diff options
Diffstat (limited to 'include/linux')
532 files changed, 16439 insertions, 5362 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 641dc4843987..4db54e928b36 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -30,6 +30,7 @@ struct irq_domain_ops; #include <linux/dynamic_debug.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/fw_table.h> #include <acpi/acpi_bus.h> #include <acpi/acpi_drivers.h> @@ -37,6 +38,16 @@ struct irq_domain_ops; #include <acpi/acpi_io.h> #include <asm/acpi.h> +#ifdef CONFIG_ACPI_TABLE_LIB +#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) +#define __init_or_acpilib +#define __initdata_or_acpilib +#else +#define EXPORT_SYMBOL_ACPI_LIB(x) +#define __init_or_acpilib __init +#define __initdata_or_acpilib __initdata +#endif + static inline acpi_handle acpi_device_handle(struct acpi_device *adev) { return adev ? adev->handle : NULL; @@ -119,21 +130,8 @@ enum acpi_address_range_id { /* Table Handlers */ -union acpi_subtable_headers { - struct acpi_subtable_header common; - struct acpi_hmat_structure hmat; - struct acpi_prmt_module_header prmt; - struct acpi_cedt_header cedt; -}; - typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); -typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, - const unsigned long end); - -typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header, - void *arg, const unsigned long end); - /* Debugger support */ struct acpi_debugger_ops { @@ -207,14 +205,6 @@ static inline int acpi_debugger_notify_command_complete(void) (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) -struct acpi_subtable_proc { - int id; - acpi_tbl_entry_handler handler; - acpi_tbl_entry_handler_arg handler_arg; - void *arg; - int count; -}; - void __iomem *__acpi_map_table(unsigned long phys, unsigned long size); void __acpi_unmap_table(void __iomem *map, unsigned long size); int early_acpi_boot_init(void); @@ -229,16 +219,6 @@ void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); -#ifdef CONFIG_ACPI_TABLE_LIB -#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) -#define __init_or_acpilib -#define __initdata_or_acpilib -#else -#define EXPORT_SYMBOL_ACPI_LIB(x) -#define __init_or_acpilib __init -#define __initdata_or_acpilib __initdata -#endif - int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init_or_acpilib acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, @@ -256,10 +236,15 @@ acpi_table_parse_cedt(enum acpi_cedt_type id, int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); +static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc) +{ + return gicc->flags & ACPI_MADT_ENABLED; +} + /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); -#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH) +#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void @@ -477,8 +462,6 @@ static inline int acpi_get_node(acpi_handle handle) return 0; } #endif -extern int acpi_paddr_to_node(u64 start_addr, u64 size); - extern int pnpacpi_disabled; #define PXM_INVAL (-1) @@ -789,6 +772,11 @@ static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) struct acpi_device; +static inline bool acpi_dev_uid_match(struct acpi_device *adev, const char *uid2) +{ + return false; +} + static inline bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2) { @@ -1100,7 +1088,7 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); -#ifdef CONFIG_X86 +#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) struct acpi_s2idle_dev_ops { struct list_head list_node; void (*prepare)(void); @@ -1109,15 +1097,14 @@ struct acpi_s2idle_dev_ops { }; int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg); -#endif /* CONFIG_X86 */ -#ifndef CONFIG_IA64 -void arch_reserve_mem_area(acpi_physical_address addr, size_t size); -#else -static inline void arch_reserve_mem_area(acpi_physical_address addr, - size_t size) +int acpi_get_lps0_constraint(struct acpi_device *adev); +#else /* CONFIG_SUSPEND && CONFIG_X86 */ +static inline int acpi_get_lps0_constraint(struct device *dev) { + return ACPI_STATE_UNKNOWN; } -#endif /* CONFIG_X86 */ +#endif /* CONFIG_SUSPEND && CONFIG_X86 */ +void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif @@ -1476,6 +1463,15 @@ static inline int lpit_read_residency_count_address(u64 *address) } #endif +#ifdef CONFIG_ACPI_PROCESSOR_IDLE +#ifndef arch_get_idle_state_flags +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + return 0; +} +#endif +#endif /* CONFIG_ACPI_PROCESSOR_IDLE */ + #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); @@ -1535,4 +1531,9 @@ static inline void acpi_device_notify(struct device *dev) { } static inline void acpi_device_notify_remove(struct device *dev) { } #endif +static inline void acpi_use_parent_companion(struct device *dev) +{ + ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); +} + #endif /*_LINUX_ACPI_H*/ diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index ee7cb6aaff71..1cb65592c95d 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -21,6 +21,7 @@ */ #define IORT_SMMU_V3_PMCG_GENERIC 0x00000000 /* Generic SMMUv3 PMCG */ #define IORT_SMMU_V3_PMCG_HISI_HIP08 0x00000001 /* HiSilicon HIP08 PMCG */ +#define IORT_SMMU_V3_PMCG_HISI_HIP09 0x00000002 /* HiSilicon HIP09 PMCG */ int iort_register_domain_token(int trans_id, phys_addr_t base, struct fwnode_handle *fw_node); diff --git a/include/linux/aer.h b/include/linux/aer.h index 3a3ab05e13fd..f6ea2f57d808 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -41,26 +41,17 @@ struct aer_capability_regs { }; #if defined(CONFIG_PCIEAER) -/* PCIe port driver needs this function to enable AER */ -int pci_enable_pcie_error_reporting(struct pci_dev *dev); -int pci_disable_pcie_error_reporting(struct pci_dev *dev); int pci_aer_clear_nonfatal_status(struct pci_dev *dev); +int pcie_aer_is_native(struct pci_dev *dev); #else -static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) -{ - return -EINVAL; -} -static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev) -{ - return -EINVAL; -} static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } +static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif -void cper_print_aer(struct pci_dev *dev, int aer_severity, +void pci_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer); int cper_severity_to_aer(int cper_severity); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 953e6f12fa1c..dc7ed2f46886 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -32,128 +32,7 @@ struct task_struct; struct pci_dev; extern int amd_iommu_detect(void); -extern int amd_iommu_init_hardware(void); - -/** - * amd_iommu_init_device() - Init device for use with IOMMUv2 driver - * @pdev: The PCI device to initialize - * @pasids: Number of PASIDs to support for this device - * - * This function does all setup for the device pdev so that it can be - * used with IOMMUv2. - * Returns 0 on success or negative value on error. - */ -extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids); - -/** - * amd_iommu_free_device() - Free all IOMMUv2 related device resources - * and disable IOMMUv2 usage for this device - * @pdev: The PCI device to disable IOMMUv2 usage for' - */ -extern void amd_iommu_free_device(struct pci_dev *pdev); - -/** - * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device - * @pdev: The PCI device to bind the task to - * @pasid: The PASID on the device the task should be bound to - * @task: the task to bind - * - * The function returns 0 on success or a negative value on error. - */ -extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, - struct task_struct *task); - -/** - * amd_iommu_unbind_pasid() - Unbind a PASID from its task on - * a device - * @pdev: The device of the PASID - * @pasid: The PASID to unbind - * - * When this function returns the device is no longer using the PASID - * and the PASID is no longer bound to its task. - */ -extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid); - -/** - * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed - * PRI requests - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - * - * The IOMMUv2 driver invokes this call-back when it is unable to - * successfully handle a PRI request. The device driver can then decide - * which PRI response the device should see. Possible return values for - * the call-back are: - * - * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device - * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device - * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device, - * the device is required to disable - * PRI when it receives this response - * - * The function returns 0 on success or negative value on error. - */ -#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0 -#define AMD_IOMMU_INV_PRI_RSP_INVALID 1 -#define AMD_IOMMU_INV_PRI_RSP_FAIL 2 - -typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, - u32 pasid, - unsigned long address, - u16); - -extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, - amd_iommu_invalid_ppr_cb cb); - -#define PPR_FAULT_EXEC (1 << 1) -#define PPR_FAULT_READ (1 << 2) -#define PPR_FAULT_WRITE (1 << 5) -#define PPR_FAULT_USER (1 << 6) -#define PPR_FAULT_RSVD (1 << 7) -#define PPR_FAULT_GN (1 << 8) - -/** - * amd_iommu_device_info() - Get information about IOMMUv2 support of a - * PCI device - * @pdev: PCI device to query information from - * @info: A pointer to an amd_iommu_device_info structure which will contain - * the information about the PCI device - * - * Returns 0 on success, negative value on error - */ - -#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ -#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution - on memory pages */ -#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request - super-user privileges */ - -struct amd_iommu_device_info { - int max_pasids; - u32 flags; -}; - -extern int amd_iommu_device_info(struct pci_dev *pdev, - struct amd_iommu_device_info *info); - -/** - * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating - * a pasid context. This call-back is - * invoked when the IOMMUv2 driver needs to - * invalidate a PASID context, for example - * because the task that is bound to that - * context is about to exit. - * - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - */ - -typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid); -extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, - amd_iommu_invalidate_ctx cb); #else /* CONFIG_AMD_IOMMU */ static inline int amd_iommu_detect(void) { return -ENODEV; } diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 446394f84606..6ad02ad9c7b4 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -70,6 +70,10 @@ struct amd_cpudata { u32 nominal_perf; u32 lowest_nonlinear_perf; u32 lowest_perf; + u32 min_limit_perf; + u32 max_limit_perf; + u32 min_limit_freq; + u32 max_limit_freq; u32 max_freq; u32 min_freq; diff --git a/include/linux/args.h b/include/linux/args.h new file mode 100644 index 000000000000..8ff60a54eb7d --- /dev/null +++ b/include/linux/args.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_ARGS_H +#define _LINUX_ARGS_H + +/* + * How do these macros work? + * + * In __COUNT_ARGS() _0 to _12 are just placeholders from the start + * in order to make sure _n is positioned over the correct number + * from 12 to 0 (depending on X, which is a variadic argument list). + * They serve no purpose other than occupying a position. Since each + * macro parameter must have a distinct identifier, those identifiers + * are as good as any. + * + * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns + * that as _n. + */ + +/* This counts to 12. Any more, it will return 13th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* Concatenate two parameters, but allow them to be expanded beforehand. */ +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#endif /* _LINUX_ARGS_H */ diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index f196c19f8e55..083f85653716 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -5,6 +5,7 @@ #ifndef __LINUX_ARM_SMCCC_H #define __LINUX_ARM_SMCCC_H +#include <linux/args.h> #include <linux/init.h> #include <uapi/linux/const.h> @@ -66,6 +67,8 @@ #define ARM_SMCCC_VERSION_1_3 0x10003 #define ARM_SMCCC_1_3_SVE_HINT 0x10000 +#define ARM_SMCCC_CALL_HINTS ARM_SMCCC_1_3_SVE_HINT + #define ARM_SMCCC_VERSION_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ @@ -413,31 +416,26 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #endif -#define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x - -#define __count_args(...) \ - ___count_args(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0) - -#define __constraint_read_0 "r" (arg0) -#define __constraint_read_1 __constraint_read_0, "r" (arg1) -#define __constraint_read_2 __constraint_read_1, "r" (arg2) -#define __constraint_read_3 __constraint_read_2, "r" (arg3) -#define __constraint_read_4 __constraint_read_3, "r" (arg4) -#define __constraint_read_5 __constraint_read_4, "r" (arg5) -#define __constraint_read_6 __constraint_read_5, "r" (arg6) -#define __constraint_read_7 __constraint_read_6, "r" (arg7) +#define __constraint_read_2 "r" (arg0) +#define __constraint_read_3 __constraint_read_2, "r" (arg1) +#define __constraint_read_4 __constraint_read_3, "r" (arg2) +#define __constraint_read_5 __constraint_read_4, "r" (arg3) +#define __constraint_read_6 __constraint_read_5, "r" (arg4) +#define __constraint_read_7 __constraint_read_6, "r" (arg5) +#define __constraint_read_8 __constraint_read_7, "r" (arg6) +#define __constraint_read_9 __constraint_read_8, "r" (arg7) -#define __declare_arg_0(a0, res) \ +#define __declare_arg_2(a0, res) \ struct arm_smccc_res *___res = res; \ register unsigned long arg0 asm("r0") = (u32)a0 -#define __declare_arg_1(a0, a1, res) \ +#define __declare_arg_3(a0, a1, res) \ typeof(a1) __a1 = a1; \ struct arm_smccc_res *___res = res; \ register unsigned long arg0 asm("r0") = (u32)a0; \ register typeof(a1) arg1 asm("r1") = __a1 -#define __declare_arg_2(a0, a1, a2, res) \ +#define __declare_arg_4(a0, a1, a2, res) \ typeof(a1) __a1 = a1; \ typeof(a2) __a2 = a2; \ struct arm_smccc_res *___res = res; \ @@ -445,7 +443,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, register typeof(a1) arg1 asm("r1") = __a1; \ register typeof(a2) arg2 asm("r2") = __a2 -#define __declare_arg_3(a0, a1, a2, a3, res) \ +#define __declare_arg_5(a0, a1, a2, a3, res) \ typeof(a1) __a1 = a1; \ typeof(a2) __a2 = a2; \ typeof(a3) __a3 = a3; \ @@ -455,34 +453,26 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, register typeof(a2) arg2 asm("r2") = __a2; \ register typeof(a3) arg3 asm("r3") = __a3 -#define __declare_arg_4(a0, a1, a2, a3, a4, res) \ +#define __declare_arg_6(a0, a1, a2, a3, a4, res) \ typeof(a4) __a4 = a4; \ - __declare_arg_3(a0, a1, a2, a3, res); \ + __declare_arg_5(a0, a1, a2, a3, res); \ register typeof(a4) arg4 asm("r4") = __a4 -#define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \ +#define __declare_arg_7(a0, a1, a2, a3, a4, a5, res) \ typeof(a5) __a5 = a5; \ - __declare_arg_4(a0, a1, a2, a3, a4, res); \ + __declare_arg_6(a0, a1, a2, a3, a4, res); \ register typeof(a5) arg5 asm("r5") = __a5 -#define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \ +#define __declare_arg_8(a0, a1, a2, a3, a4, a5, a6, res) \ typeof(a6) __a6 = a6; \ - __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \ + __declare_arg_7(a0, a1, a2, a3, a4, a5, res); \ register typeof(a6) arg6 asm("r6") = __a6 -#define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \ +#define __declare_arg_9(a0, a1, a2, a3, a4, a5, a6, a7, res) \ typeof(a7) __a7 = a7; \ - __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \ + __declare_arg_8(a0, a1, a2, a3, a4, a5, a6, res); \ register typeof(a7) arg7 asm("r7") = __a7 -#define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__) -#define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__) - -#define ___constraints(count) \ - : __constraint_read_ ## count \ - : smccc_sve_clobbers "memory" -#define __constraints(count) ___constraints(count) - /* * We have an output list that is not necessarily used, and GCC feels * entitled to optimise the whole sequence away. "volatile" is what @@ -494,11 +484,14 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, register unsigned long r1 asm("r1"); \ register unsigned long r2 asm("r2"); \ register unsigned long r3 asm("r3"); \ - __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ + CONCATENATE(__declare_arg_, \ + COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \ asm volatile(SMCCC_SVE_CHECK \ inst "\n" : \ "=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \ - __constraints(__count_args(__VA_ARGS__))); \ + : CONCATENATE(__constraint_read_, \ + COUNT_ARGS(__VA_ARGS__)) \ + : smccc_sve_clobbers "memory"); \ if (___res) \ *___res = (typeof(*___res)){r0, r1, r2, r3}; \ } while (0) @@ -542,8 +535,12 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, */ #define __fail_smccc_1_1(...) \ do { \ - __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ - asm ("" : __constraints(__count_args(__VA_ARGS__))); \ + CONCATENATE(__declare_arg_, \ + COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \ + asm ("" : \ + : CONCATENATE(__constraint_read_, \ + COUNT_ARGS(__VA_ARGS__)) \ + : smccc_sve_clobbers "memory"); \ if (___res) \ ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ } while (0) diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index cc060da51bec..3d0fde57ba90 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -6,6 +6,7 @@ #ifndef _LINUX_ARM_FFA_H #define _LINUX_ARM_FFA_H +#include <linux/bitfield.h> #include <linux/device.h> #include <linux/module.h> #include <linux/types.h> @@ -20,6 +21,7 @@ #define FFA_ERROR FFA_SMC_32(0x60) #define FFA_SUCCESS FFA_SMC_32(0x61) +#define FFA_FN64_SUCCESS FFA_SMC_64(0x61) #define FFA_INTERRUPT FFA_SMC_32(0x62) #define FFA_VERSION FFA_SMC_32(0x63) #define FFA_FEATURES FFA_SMC_32(0x64) @@ -54,6 +56,23 @@ #define FFA_MEM_FRAG_RX FFA_SMC_32(0x7A) #define FFA_MEM_FRAG_TX FFA_SMC_32(0x7B) #define FFA_NORMAL_WORLD_RESUME FFA_SMC_32(0x7C) +#define FFA_NOTIFICATION_BITMAP_CREATE FFA_SMC_32(0x7D) +#define FFA_NOTIFICATION_BITMAP_DESTROY FFA_SMC_32(0x7E) +#define FFA_NOTIFICATION_BIND FFA_SMC_32(0x7F) +#define FFA_NOTIFICATION_UNBIND FFA_SMC_32(0x80) +#define FFA_NOTIFICATION_SET FFA_SMC_32(0x81) +#define FFA_NOTIFICATION_GET FFA_SMC_32(0x82) +#define FFA_NOTIFICATION_INFO_GET FFA_SMC_32(0x83) +#define FFA_FN64_NOTIFICATION_INFO_GET FFA_SMC_64(0x83) +#define FFA_RX_ACQUIRE FFA_SMC_32(0x84) +#define FFA_SPM_ID_GET FFA_SMC_32(0x85) +#define FFA_MSG_SEND2 FFA_SMC_32(0x86) +#define FFA_SECONDARY_EP_REGISTER FFA_SMC_32(0x87) +#define FFA_FN64_SECONDARY_EP_REGISTER FFA_SMC_64(0x87) +#define FFA_MEM_PERM_GET FFA_SMC_32(0x88) +#define FFA_FN64_MEM_PERM_GET FFA_SMC_64(0x88) +#define FFA_MEM_PERM_SET FFA_SMC_32(0x89) +#define FFA_FN64_MEM_PERM_SET FFA_SMC_64(0x89) /* * For some calls it is necessary to use SMC64 to pass or return 64-bit values. @@ -76,6 +95,7 @@ #define FFA_RET_DENIED (-6) #define FFA_RET_RETRY (-7) #define FFA_RET_ABORTED (-8) +#define FFA_RET_NO_DATA (-9) /* FFA version encoding */ #define FFA_MAJOR_VERSION_MASK GENMASK(30, 16) @@ -86,6 +106,7 @@ (FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) | \ FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor))) #define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0) +#define FFA_VERSION_1_1 FFA_PACK_VERSION_INFO(1, 1) /** * FF-A specification mentions explicitly about '4K pages'. This should @@ -188,6 +209,8 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } #define module_ffa_driver(__ffa_driver) \ module_driver(__ffa_driver, ffa_register, ffa_unregister) +extern struct bus_type ffa_bus_type; + /* FFA transport related */ struct ffa_partition_info { u16 id; @@ -278,8 +301,8 @@ struct ffa_mem_region { #define FFA_MEM_NON_SHAREABLE (0) #define FFA_MEM_OUTER_SHAREABLE (2) #define FFA_MEM_INNER_SHAREABLE (3) - u8 attributes; - u8 reserved_0; + /* Memory region attributes, upper byte MBZ pre v1.1 */ + u16 attributes; /* * Clear memory region contents after unmapping it from the sender and * before mapping it for any receiver. @@ -317,27 +340,41 @@ struct ffa_mem_region { * memory region. */ u64 tag; - u32 reserved_1; + /* Size of each endpoint memory access descriptor, MBZ pre v1.1 */ + u32 ep_mem_size; /* * The number of `ffa_mem_region_attributes` entries included in this * transaction. */ u32 ep_count; /* - * An array of endpoint memory access descriptors. - * Each one specifies a memory region offset, an endpoint and the - * attributes with which this memory region should be mapped in that - * endpoint's page table. + * 16-byte aligned offset from the base address of this descriptor + * to the first element of the endpoint memory access descriptor array + * Valid only from v1.1 */ - struct ffa_mem_region_attributes ep_mem_access[]; + u32 ep_mem_offset; + /* MBZ, valid only from v1.1 */ + u32 reserved[3]; }; -#define COMPOSITE_OFFSET(x) \ - (offsetof(struct ffa_mem_region, ep_mem_access[x])) #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) -#define COMPOSITE_CONSTITUENTS_OFFSET(x, y) \ - (COMPOSITE_OFFSET(x) + CONSTITUENTS_OFFSET(y)) + +static inline u32 +ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) +{ + u32 offset = count * sizeof(struct ffa_mem_region_attributes); + /* + * Earlier to v1.1, the endpoint memory descriptor array started at + * offset 32(i.e. offset of ep_mem_offset in the current structure) + */ + if (ffa_version <= FFA_VERSION_1_0) + offset += offsetof(struct ffa_mem_region, ep_mem_offset); + else + offset += sizeof(struct ffa_mem_region); + + return offset; +} struct ffa_mem_ops_args { bool use_txbuf; @@ -367,10 +404,30 @@ struct ffa_mem_ops { int (*memory_lend)(struct ffa_mem_ops_args *args); }; +struct ffa_cpu_ops { + int (*run)(struct ffa_device *dev, u16 vcpu); +}; + +typedef void (*ffa_sched_recv_cb)(u16 vcpu, bool is_per_vcpu, void *cb_data); +typedef void (*ffa_notifier_cb)(int notify_id, void *cb_data); + +struct ffa_notifier_ops { + int (*sched_recv_cb_register)(struct ffa_device *dev, + ffa_sched_recv_cb cb, void *cb_data); + int (*sched_recv_cb_unregister)(struct ffa_device *dev); + int (*notify_request)(struct ffa_device *dev, bool per_vcpu, + ffa_notifier_cb cb, void *cb_data, int notify_id); + int (*notify_relinquish)(struct ffa_device *dev, int notify_id); + int (*notify_send)(struct ffa_device *dev, int notify_id, bool per_vcpu, + u16 vcpu); +}; + struct ffa_ops { const struct ffa_info_ops *info_ops; const struct ffa_msg_ops *msg_ops; const struct ffa_mem_ops *mem_ops; + const struct ffa_cpu_ops *cpu_ops; + const struct ffa_notifier_ops *notifier_ops; }; #endif /* _LINUX_ARM_FFA_H */ diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 14dc461b0e82..255701e1251b 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -47,10 +47,12 @@ int sdei_unregister_ghes(struct ghes *ghes); int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); void __init sdei_init(void); +void sdei_handler_abort(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } static inline void sdei_init(void) { } +static inline void sdei_handler_abort(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/include/linux/array_size.h b/include/linux/array_size.h new file mode 100644 index 000000000000..06d7d83196ca --- /dev/null +++ b/include/linux/array_size.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ARRAY_SIZE_H +#define _LINUX_ARRAY_SIZE_H + +#include <linux/compiler.h> + +/** + * ARRAY_SIZE - get the number of elements in array @arr + * @arr: array to be sized + */ +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + +#endif /* _LINUX_ARRAY_SIZE_H */ diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h deleted file mode 100644 index 1491af38cc6e..000000000000 --- a/include/linux/atmel-mci.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_ATMEL_MCI_H -#define __LINUX_ATMEL_MCI_H - -#include <linux/types.h> -#include <linux/dmaengine.h> - -#define ATMCI_MAX_NR_SLOTS 2 - -/** - * struct mci_slot_pdata - board-specific per-slot configuration - * @bus_width: Number of data lines wired up the slot - * @detect_pin: GPIO pin wired to the card detect switch - * @wp_pin: GPIO pin wired to the write protect sensor - * @detect_is_active_high: The state of the detect pin when it is active - * @non_removable: The slot is not removable, only detect once - * - * If a given slot is not present on the board, @bus_width should be - * set to 0. The other fields are ignored in this case. - * - * Any pins that aren't available should be set to a negative value. - * - * Note that support for multiple slots is experimental -- some cards - * might get upset if we don't get the clock management exactly right. - * But in most cases, it should work just fine. - */ -struct mci_slot_pdata { - unsigned int bus_width; - int detect_pin; - int wp_pin; - bool detect_is_active_high; - bool non_removable; -}; - -/** - * struct mci_platform_data - board-specific MMC/SDcard configuration - * @dma_slave: DMA slave interface to use in data transfers. - * @slot: Per-slot configuration data. - */ -struct mci_platform_data { - void *dma_slave; - dma_filter_fn dma_filter; - struct mci_slot_pdata slot[ATMCI_MAX_NR_SLOTS]; -}; - -#endif /* __LINUX_ATMEL_MCI_H */ diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index 18f5744dfb5d..5e95faa959c4 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -428,6 +428,19 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void); #define raw_sync_cmpxchg arch_sync_cmpxchg +#ifdef arch_sync_try_cmpxchg +#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg +#else +#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif + /** * raw_atomic_read() - atomic load with relaxed ordering * @v: pointer to atomic_t @@ -459,8 +472,6 @@ raw_atomic_read_acquire(const atomic_t *v) { #if defined(arch_atomic_read_acquire) return arch_atomic_read_acquire(v); -#elif defined(arch_atomic_read) - return arch_atomic_read(v); #else int ret; @@ -508,8 +519,6 @@ raw_atomic_set_release(atomic_t *v, int i) { #if defined(arch_atomic_set_release) arch_atomic_set_release(v, i); -#elif defined(arch_atomic_set) - arch_atomic_set(v, i); #else if (__native_word(atomic_t)) { smp_store_release(&(v)->counter, i); @@ -2575,8 +2584,6 @@ raw_atomic64_read_acquire(const atomic64_t *v) { #if defined(arch_atomic64_read_acquire) return arch_atomic64_read_acquire(v); -#elif defined(arch_atomic64_read) - return arch_atomic64_read(v); #else s64 ret; @@ -2624,8 +2631,6 @@ raw_atomic64_set_release(atomic64_t *v, s64 i) { #if defined(arch_atomic64_set_release) arch_atomic64_set_release(v, i); -#elif defined(arch_atomic64_set) - arch_atomic64_set(v, i); #else if (__native_word(atomic64_t)) { smp_store_release(&(v)->counter, i); @@ -4657,4 +4662,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 202b45c7db600ce36198eb1f1fc2c2d5268ace2d +// eec048affea735b8464f58e6d96992101f8f85f1 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index d401b406ef7c..54d7bbe0aeaa 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -4998,6 +4998,14 @@ atomic_long_dec_if_positive(atomic_long_t *v) raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) +#define sync_try_cmpxchg(ptr, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ + instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \ + raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \ +}) + #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 1568f875fef72097413caab8339120c065a39aa4 +// 2cc4bc990fef44d3836ec108f11b610f3f438184 diff --git a/include/linux/audit.h b/include/linux/audit.h index 6a3a9e122bb5..51b1b7054a23 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -117,6 +117,8 @@ enum audit_nfcfgop { AUDIT_NFT_OP_OBJ_RESET, AUDIT_NFT_OP_FLOWTABLE_REGISTER, AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, + AUDIT_NFT_OP_SETELEM_RESET, + AUDIT_NFT_OP_RULE_RESET, AUDIT_NFT_OP_INVALID, }; diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index c15221dcb75e..6b3acf15be5c 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -4,6 +4,10 @@ #ifndef _VIRTCHNL_H_ #define _VIRTCHNL_H_ +#include <linux/bitops.h> +#include <linux/overflow.h> +#include <uapi/linux/if_ether.h> + /* Description: * This header file describes the Virtual Function (VF) - Physical Function * (PF) communication protocol used by the drivers for all devices starting @@ -240,6 +244,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6) /* used to negotiate communicating link speeds in Mbps */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7) +#define VIRTCHNL_VF_OFFLOAD_CRC BIT(10) #define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15) #define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16) #define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17) @@ -268,10 +273,11 @@ struct virtchnl_vf_resource { u32 rss_key_size; u32 rss_lut_size; - struct virtchnl_vsi_resource vsi_res[1]; + struct virtchnl_vsi_resource vsi_res[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_vf_resource); +VIRTCHNL_CHECK_STRUCT_LEN(20, virtchnl_vf_resource); +#define virtchnl_vf_resource_LEGACY_SIZEOF 36 /* VIRTCHNL_OP_CONFIG_TX_QUEUE * VF sends this message to set up parameters for one TX queue. @@ -294,7 +300,13 @@ VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. - * PF configures requested queue and returns a status code. + * PF configures requested queue and returns a status code. The + * crc_disable flag disables CRC stripping on the VF. Setting + * the crc_disable flag to 1 will disable CRC stripping for each + * queue in the VF where the flag is set. The VIRTCHNL_VF_OFFLOAD_CRC + * offload must have been set prior to sending this info or the PF + * will ignore the request. This flag should be set the same for + * all of the queues for a VF. */ /* Rx queue config info */ @@ -306,7 +318,7 @@ struct virtchnl_rxq_info { u16 splithdr_enabled; /* deprecated with AVF 1.0 */ u32 databuffer_size; u32 max_pkt_size; - u8 pad0; + u8 crc_disable; u8 rxdid; u8 pad1[2]; u64 dma_ring_addr; @@ -340,10 +352,11 @@ struct virtchnl_vsi_queue_config_info { u16 vsi_id; u16 num_queue_pairs; u32 pad; - struct virtchnl_queue_pair_info qpair[1]; + struct virtchnl_queue_pair_info qpair[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info); +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_vsi_queue_config_info); +#define virtchnl_vsi_queue_config_info_LEGACY_SIZEOF 72 /* VIRTCHNL_OP_REQUEST_QUEUES * VF sends this message to request the PF to allocate additional queues to @@ -385,10 +398,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_vector_map); struct virtchnl_irq_map_info { u16 num_vectors; - struct virtchnl_vector_map vecmap[1]; + struct virtchnl_vector_map vecmap[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(14, virtchnl_irq_map_info); +VIRTCHNL_CHECK_STRUCT_LEN(2, virtchnl_irq_map_info); +#define virtchnl_irq_map_info_LEGACY_SIZEOF 14 /* VIRTCHNL_OP_ENABLE_QUEUES * VIRTCHNL_OP_DISABLE_QUEUES @@ -459,10 +473,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_ether_addr); struct virtchnl_ether_addr_list { u16 vsi_id; u16 num_elements; - struct virtchnl_ether_addr list[1]; + struct virtchnl_ether_addr list[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_ether_addr_list); +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_ether_addr_list); +#define virtchnl_ether_addr_list_LEGACY_SIZEOF 12 /* VIRTCHNL_OP_ADD_VLAN * VF sends this message to add one or more VLAN tag filters for receives. @@ -481,10 +496,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_ether_addr_list); struct virtchnl_vlan_filter_list { u16 vsi_id; u16 num_elements; - u16 vlan_id[1]; + u16 vlan_id[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_vlan_filter_list); +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_vlan_filter_list); +#define virtchnl_vlan_filter_list_LEGACY_SIZEOF 6 /* This enum is used for all of the VIRTCHNL_VF_OFFLOAD_VLAN_V2_CAPS related * structures and opcodes. @@ -711,10 +727,11 @@ struct virtchnl_vlan_filter_list_v2 { u16 vport_id; u16 num_elements; u8 pad[4]; - struct virtchnl_vlan_filter filters[1]; + struct virtchnl_vlan_filter filters[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_vlan_filter_list_v2); +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_vlan_filter_list_v2); +#define virtchnl_vlan_filter_list_v2_LEGACY_SIZEOF 40 /* VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 * VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 @@ -866,18 +883,20 @@ VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_promisc_info); struct virtchnl_rss_key { u16 vsi_id; u16 key_len; - u8 key[1]; /* RSS hash key, packed bytes */ + u8 key[]; /* RSS hash key, packed bytes */ }; -VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key); +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rss_key); +#define virtchnl_rss_key_LEGACY_SIZEOF 6 struct virtchnl_rss_lut { u16 vsi_id; u16 lut_entries; - u8 lut[1]; /* RSS lookup table */ + u8 lut[]; /* RSS lookup table */ }; -VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut); +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rss_lut); +#define virtchnl_rss_lut_LEGACY_SIZEOF 6 /* VIRTCHNL_OP_GET_RSS_HENA_CAPS * VIRTCHNL_OP_SET_RSS_HENA @@ -911,10 +930,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info); struct virtchnl_tc_info { u32 num_tc; u32 pad; - struct virtchnl_channel_info list[1]; + struct virtchnl_channel_info list[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info); +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_tc_info); +#define virtchnl_tc_info_LEGACY_SIZEOF 24 /* VIRTCHNL_ADD_CLOUD_FILTER * VIRTCHNL_DEL_CLOUD_FILTER @@ -1052,10 +1072,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_rdma_qv_info); struct virtchnl_rdma_qvlist_info { u32 num_vectors; - struct virtchnl_rdma_qv_info qv_info[1]; + struct virtchnl_rdma_qv_info qv_info[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_rdma_qvlist_info); +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rdma_qvlist_info); +#define virtchnl_rdma_qvlist_info_LEGACY_SIZEOF 16 /* VF reset states - these are written into the RSTAT register: * VFGEN_RSTAT on the VF @@ -1367,6 +1388,31 @@ struct virtchnl_fdir_del { VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); +#define __vss_byone(p, member, count, old) \ + (struct_size(p, member, count) + (old - 1 - struct_size(p, member, 0))) + +#define __vss_byelem(p, member, count, old) \ + (struct_size(p, member, count - 1) + (old - struct_size(p, member, 0))) + +#define __vss_full(p, member, count, old) \ + (struct_size(p, member, count) + (old - struct_size(p, member, 0))) + +#define __vss(type, func, p, member, count) \ + struct type: func(p, member, count, type##_LEGACY_SIZEOF) + +#define virtchnl_struct_size(p, m, c) \ + _Generic(*p, \ + __vss(virtchnl_vf_resource, __vss_full, p, m, c), \ + __vss(virtchnl_vsi_queue_config_info, __vss_full, p, m, c), \ + __vss(virtchnl_irq_map_info, __vss_full, p, m, c), \ + __vss(virtchnl_ether_addr_list, __vss_full, p, m, c), \ + __vss(virtchnl_vlan_filter_list, __vss_full, p, m, c), \ + __vss(virtchnl_vlan_filter_list_v2, __vss_byelem, p, m, c), \ + __vss(virtchnl_tc_info, __vss_byelem, p, m, c), \ + __vss(virtchnl_rdma_qvlist_info, __vss_byelem, p, m, c), \ + __vss(virtchnl_rss_key, __vss_byone, p, m, c), \ + __vss(virtchnl_rss_lut, __vss_byone, p, m, c)) + /** * virtchnl_vc_validate_vf_msg * @ver: Virtchnl version info @@ -1401,24 +1447,23 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, valid_len = sizeof(struct virtchnl_rxq_info); break; case VIRTCHNL_OP_CONFIG_VSI_QUEUES: - valid_len = sizeof(struct virtchnl_vsi_queue_config_info); + valid_len = virtchnl_vsi_queue_config_info_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_vsi_queue_config_info *vqc = (struct virtchnl_vsi_queue_config_info *)msg; - valid_len += (vqc->num_queue_pairs * - sizeof(struct - virtchnl_queue_pair_info)); + valid_len = virtchnl_struct_size(vqc, qpair, + vqc->num_queue_pairs); if (vqc->num_queue_pairs == 0) err_msg_format = true; } break; case VIRTCHNL_OP_CONFIG_IRQ_MAP: - valid_len = sizeof(struct virtchnl_irq_map_info); + valid_len = virtchnl_irq_map_info_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_irq_map_info *vimi = (struct virtchnl_irq_map_info *)msg; - valid_len += (vimi->num_vectors * - sizeof(struct virtchnl_vector_map)); + valid_len = virtchnl_struct_size(vimi, vecmap, + vimi->num_vectors); if (vimi->num_vectors == 0) err_msg_format = true; } @@ -1429,23 +1474,24 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, break; case VIRTCHNL_OP_ADD_ETH_ADDR: case VIRTCHNL_OP_DEL_ETH_ADDR: - valid_len = sizeof(struct virtchnl_ether_addr_list); + valid_len = virtchnl_ether_addr_list_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_ether_addr_list *veal = (struct virtchnl_ether_addr_list *)msg; - valid_len += veal->num_elements * - sizeof(struct virtchnl_ether_addr); + valid_len = virtchnl_struct_size(veal, list, + veal->num_elements); if (veal->num_elements == 0) err_msg_format = true; } break; case VIRTCHNL_OP_ADD_VLAN: case VIRTCHNL_OP_DEL_VLAN: - valid_len = sizeof(struct virtchnl_vlan_filter_list); + valid_len = virtchnl_vlan_filter_list_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_vlan_filter_list *vfl = (struct virtchnl_vlan_filter_list *)msg; - valid_len += vfl->num_elements * sizeof(u16); + valid_len = virtchnl_struct_size(vfl, vlan_id, + vfl->num_elements); if (vfl->num_elements == 0) err_msg_format = true; } @@ -1469,29 +1515,31 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_RELEASE_RDMA_IRQ_MAP: break; case VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP: - valid_len = sizeof(struct virtchnl_rdma_qvlist_info); + valid_len = virtchnl_rdma_qvlist_info_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_rdma_qvlist_info *qv = (struct virtchnl_rdma_qvlist_info *)msg; - valid_len += ((qv->num_vectors - 1) * - sizeof(struct virtchnl_rdma_qv_info)); + valid_len = virtchnl_struct_size(qv, qv_info, + qv->num_vectors); } break; case VIRTCHNL_OP_CONFIG_RSS_KEY: - valid_len = sizeof(struct virtchnl_rss_key); + valid_len = virtchnl_rss_key_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg; - valid_len += vrk->key_len - 1; + valid_len = virtchnl_struct_size(vrk, key, + vrk->key_len); } break; case VIRTCHNL_OP_CONFIG_RSS_LUT: - valid_len = sizeof(struct virtchnl_rss_lut); + valid_len = virtchnl_rss_lut_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; - valid_len += vrl->lut_entries - 1; + valid_len = virtchnl_struct_size(vrl, lut, + vrl->lut_entries); } break; case VIRTCHNL_OP_GET_RSS_HENA_CAPS: @@ -1506,12 +1554,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, valid_len = sizeof(struct virtchnl_vf_res_request); break; case VIRTCHNL_OP_ENABLE_CHANNELS: - valid_len = sizeof(struct virtchnl_tc_info); + valid_len = virtchnl_tc_info_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_tc_info *vti = (struct virtchnl_tc_info *)msg; - valid_len += (vti->num_tc - 1) * - sizeof(struct virtchnl_channel_info); + valid_len = virtchnl_struct_size(vti, list, + vti->num_tc); if (vti->num_tc == 0) err_msg_format = true; } @@ -1538,13 +1586,13 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, break; case VIRTCHNL_OP_ADD_VLAN_V2: case VIRTCHNL_OP_DEL_VLAN_V2: - valid_len = sizeof(struct virtchnl_vlan_filter_list_v2); + valid_len = virtchnl_vlan_filter_list_v2_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_vlan_filter_list_v2 *vfl = (struct virtchnl_vlan_filter_list_v2 *)msg; - valid_len += (vfl->num_elements - 1) * - sizeof(struct virtchnl_vlan_filter); + valid_len = virtchnl_struct_size(vfl, filters, + vfl->num_elements); if (vfl->num_elements == 0) { err_msg_format = true; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index fbad4fcd408e..1a97277f99b1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -46,7 +46,6 @@ extern spinlock_t bdi_lock; extern struct list_head bdi_list; extern struct workqueue_struct *bdi_wq; -extern struct workqueue_struct *bdi_async_bio_wq; static inline bool wb_has_dirty_io(struct bdi_writeback *wb) { diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h index 2426276b9bd3..670f2dae692f 100644 --- a/include/linux/badblocks.h +++ b/include/linux/badblocks.h @@ -15,6 +15,7 @@ #define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9) #define BB_LEN(x) (((x) & BB_LEN_MASK) + 1) #define BB_ACK(x) (!!((x) & BB_ACK_MASK)) +#define BB_END(x) (BB_OFFSET(x) + BB_LEN(x)) #define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63)) /* Bad block numbers are stored sorted in a single page. @@ -41,6 +42,12 @@ struct badblocks { sector_t size; /* in sectors */ }; +struct badblocks_context { + sector_t start; + sector_t len; + int ack; +}; + int badblocks_check(struct badblocks *bb, sector_t s, int sectors, sector_t *first_bad, int *bad_sectors); int badblocks_set(struct badblocks *bb, sector_t s, int sectors, @@ -63,4 +70,27 @@ static inline void devm_exit_badblocks(struct device *dev, struct badblocks *bb) } badblocks_exit(bb); } + +static inline int badblocks_full(struct badblocks *bb) +{ + return (bb->count >= MAX_BADBLOCKS); +} + +static inline int badblocks_empty(struct badblocks *bb) +{ + return (bb->count == 0); +} + +static inline void set_changed(struct badblocks *bb) +{ + if (bb->changed != 1) + bb->changed = 1; +} + +static inline void clear_changed(struct badblocks *bb) +{ + if (bb->changed != 0) + bb->changed = 0; +} + #endif diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 8d51f69f9f5e..70f97f685bff 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -90,6 +90,16 @@ struct linux_binfmt { #endif } __randomize_layout; +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc { + struct list_head entries; + rwlock_t entries_lock; + bool enabled; +} __randomize_layout; + +extern struct binfmt_misc init_binfmt_misc; +#endif + extern void __register_binfmt(struct linux_binfmt *fmt, int insert); /* Registration of default binfmt handlers */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 11984ed29cb8..41d417ee1349 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -253,6 +253,11 @@ static inline struct page *bio_first_page_all(struct bio *bio) return bio_first_bvec_all(bio)->bv_page; } +static inline struct folio *bio_first_folio_all(struct bio *bio) +{ + return page_folio(bio_first_page_all(bio)); +} + static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); @@ -488,7 +493,12 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void guard_bio_eod(struct bio *bio); -void zero_fill_bio(struct bio *bio); +void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); + +static inline void zero_fill_bio(struct bio *bio) +{ + zero_fill_bio_iter(bio, bio->bi_iter); +} static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h new file mode 100644 index 000000000000..17caeca94cab --- /dev/null +++ b/include/linux/bitmap-str.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_BITMAP_STR_H +#define __LINUX_BITMAP_STR_H + +int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); +int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); +extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count); +extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count); +int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); +int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); +int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, + unsigned long *dst, int nbits); + +#endif /* __LINUX_BITMAP_STR_H */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 03644237e1ef..99451431e4d6 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -6,10 +6,12 @@ #include <linux/align.h> #include <linux/bitops.h> +#include <linux/errno.h> #include <linux/find.h> #include <linux/limits.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/bitmap-str.h> struct device; @@ -200,14 +202,6 @@ bitmap_find_next_zero_area(unsigned long *map, align_mask, 0); } -int bitmap_parse(const char *buf, unsigned int buflen, - unsigned long *dst, int nbits); -int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, - unsigned long *dst, int nbits); -int bitmap_parselist(const char *buf, unsigned long *maskp, - int nmaskbits); -int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, - unsigned long *dst, int nbits); void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, unsigned int nbits); int bitmap_bitremap(int oldbit, @@ -216,23 +210,6 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig, const unsigned long *relmap, unsigned int bits); void bitmap_fold(unsigned long *dst, const unsigned long *orig, unsigned int sz, unsigned int nbits); -int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); -void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); -int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); - -#ifdef __BIG_ENDIAN -void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); -#else -#define bitmap_copy_le bitmap_copy -#endif -int bitmap_print_to_pagebuf(bool list, char *buf, - const unsigned long *maskp, int nmaskbits); - -extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); - -extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) @@ -519,6 +496,66 @@ static inline void bitmap_next_set_region(unsigned long *bitmap, } /** + * bitmap_release_region - release allocated bitmap region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @pos: beginning of bit region to release + * @order: region size (log base 2 of number of bits) to release + * + * This is the complement to __bitmap_find_free_region() and releases + * the found region (by clearing it in the bitmap). + */ +static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) +{ + bitmap_clear(bitmap, pos, BIT(order)); +} + +/** + * bitmap_allocate_region - allocate bitmap region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @pos: beginning of bit region to allocate + * @order: region size (log base 2 of number of bits) to allocate + * + * Allocate (set bits in) a specified region of a bitmap. + * + * Returns: 0 on success, or %-EBUSY if specified region wasn't + * free (not all bits were zero). + */ +static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) +{ + unsigned int len = BIT(order); + + if (find_next_bit(bitmap, pos + len, pos) < pos + len) + return -EBUSY; + bitmap_set(bitmap, pos, len); + return 0; +} + +/** + * bitmap_find_free_region - find a contiguous aligned mem region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @bits: number of bits in the bitmap + * @order: region size (log base 2 of number of bits) to find + * + * Find a region of free (zero) bits in a @bitmap of @bits bits and + * allocate them (set them to one). Only consider regions of length + * a power (@order) of two, aligned to that power of two, which + * makes the search algorithm much faster. + * + * Returns: the bit offset in bitmap of the allocated region, + * or -errno on failure. + */ +static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) +{ + unsigned int pos, end; /* scans bitmap by regions of size order */ + + for (pos = 0; (end = pos + BIT(order)) <= bits; pos = end) { + if (!bitmap_allocate_region(bitmap, pos, order)) + return pos; + } + return -ENOMEM; +} + +/** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. * @n: u64 value * diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 495ca198775f..1ab3081c82ed 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -32,8 +32,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) /* merge of different types, fail separately */ #define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) -/* track inflight for MQ */ -#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) /* use hctx->sched_tags */ @@ -178,14 +176,10 @@ struct request { struct { unsigned int seq; - struct list_head list; rq_end_io_fn *saved_end_io; } flush; - union { - struct __call_single_data csd; - u64 fifo_time; - }; + u64 fifo_time; /* * completion callback. diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h index 2580e05a8ab6..004b38a538ff 100644 --- a/include/linux/blk-pm.h +++ b/include/linux/blk-pm.h @@ -15,7 +15,6 @@ extern int blk_pre_runtime_suspend(struct request_queue *q); extern void blk_post_runtime_suspend(struct request_queue *q, int err); extern void blk_pre_runtime_resume(struct request_queue *q); extern void blk_post_runtime_resume(struct request_queue *q); -extern void blk_set_runtime_active(struct request_queue *q); #else static inline void blk_pm_runtime_init(struct request_queue *q, struct device *dev) {} diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 0bad62cca3d0..b29ebd53417d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -49,10 +49,10 @@ struct block_device { bool bd_write_holder; bool bd_has_submit_bio; dev_t bd_dev; + struct inode *bd_inode; /* will die */ + atomic_t bd_openers; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ - struct inode * bd_inode; /* will die */ - struct super_block * bd_super; void * bd_claiming; void * bd_holder; const struct blk_holder_ops *bd_holder_ops; @@ -70,6 +70,7 @@ struct block_device { #ifdef CONFIG_FAIL_MAKE_REQUEST bool bd_make_it_fail; #endif + bool bd_ro_warned; /* * keep this out-of-line as it's both big and not needed in the fast * path diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 87d94be7825a..51fa7ffdee83 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -538,6 +538,7 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ +#define QUEUE_FLAG_HW_WC 18 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ @@ -750,7 +751,8 @@ static inline int bdev_read_only(struct block_device *bdev) } bool set_capacity_and_notify(struct gendisk *disk, sector_t size); -bool disk_force_media_change(struct gendisk *disk, unsigned int events); +void disk_force_media_change(struct gendisk *disk); +void bdev_mark_dead(struct block_device *bdev, bool surprise); void add_disk_randomness(struct gendisk *disk) __latent_entropy; void rand_initialize_disk(struct gendisk *disk); @@ -809,7 +811,6 @@ int __register_blkdev(unsigned int major, const char *name, void unregister_blkdev(unsigned int major, const char *name); bool disk_check_media_change(struct gendisk *disk); -int __invalidate_device(struct block_device *bdev, bool kill_dirty); void set_capacity(struct gendisk *disk, sector_t size); #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED @@ -846,6 +847,7 @@ extern const char *blk_op_str(enum req_op op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); +const char *blk_status_to_str(blk_status_t status); /* only poll the hardware once, don't continue until a completion was found */ #define BLK_POLL_ONESHOT (1 << 0) @@ -1460,9 +1462,16 @@ void blkdev_show(struct seq_file *seqf, off_t offset); #endif struct blk_holder_ops { - void (*mark_dead)(struct block_device *bdev); + void (*mark_dead)(struct block_device *bdev, bool surprise); + + /* + * Sync the file system mounted on the block device. + */ + void (*sync)(struct block_device *bdev); }; +extern const struct blk_holder_ops fs_holder_ops; + /* * Return the correct open flags for blkdev_get_by_* for super block flags * as stored in sb->s_flags. @@ -1470,14 +1479,25 @@ struct blk_holder_ops { #define sb_open_mode(flags) \ (BLK_OPEN_READ | (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) +struct bdev_handle { + struct block_device *bdev; + void *holder; + blk_mode_t mode; +}; + struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); +struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops); +struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, + void *holder, const struct blk_holder_ops *hops); int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); void blkdev_put(struct block_device *bdev, void *holder); +void bdev_release(struct bdev_handle *handle); /* just for blk-cgroup, don't use elsewhere */ struct block_device *blkdev_get_no_open(dev_t dev); @@ -1521,8 +1541,6 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev) } #endif /* CONFIG_BLOCK */ -int fsync_bdev(struct block_device *bdev); - int freeze_bdev(struct block_device *bdev); int thaw_bdev(struct block_device *bdev); diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index e1a3c9c9754c..cffa38a73618 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -60,7 +60,7 @@ static inline void get_page_bootmem(unsigned long info, struct page *page, static inline void free_bootmem_page(struct page *page) { - kmemleak_free_part(page_to_virt(page), PAGE_SIZE); + kmemleak_free_part_phys(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE); free_reserved_page(page); } #endif diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 7b121bd780eb..0985221d5478 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_BIND, CGROUP_INET4_CONNECT, CGROUP_INET6_CONNECT, + CGROUP_UNIX_CONNECT, CGROUP_INET4_POST_BIND, CGROUP_INET6_POST_BIND, CGROUP_UDP4_SENDMSG, CGROUP_UDP6_SENDMSG, + CGROUP_UNIX_SENDMSG, CGROUP_SYSCTL, CGROUP_UDP4_RECVMSG, CGROUP_UDP6_RECVMSG, + CGROUP_UNIX_RECVMSG, CGROUP_GETSOCKOPT, CGROUP_SETSOCKOPT, CGROUP_INET4_GETPEERNAME, CGROUP_INET6_GETPEERNAME, + CGROUP_UNIX_GETPEERNAME, CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, + CGROUP_UNIX_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 57e9e109257e..a789266feac3 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET6_BIND); CGROUP_ATYPE(CGROUP_INET4_CONNECT); CGROUP_ATYPE(CGROUP_INET6_CONNECT); + CGROUP_ATYPE(CGROUP_UNIX_CONNECT); CGROUP_ATYPE(CGROUP_INET4_POST_BIND); CGROUP_ATYPE(CGROUP_INET6_POST_BIND); CGROUP_ATYPE(CGROUP_UDP4_SENDMSG); CGROUP_ATYPE(CGROUP_UDP6_SENDMSG); + CGROUP_ATYPE(CGROUP_UNIX_SENDMSG); CGROUP_ATYPE(CGROUP_SYSCTL); CGROUP_ATYPE(CGROUP_UDP4_RECVMSG); CGROUP_ATYPE(CGROUP_UDP6_RECVMSG); + CGROUP_ATYPE(CGROUP_UNIX_RECVMSG); CGROUP_ATYPE(CGROUP_GETSOCKOPT); CGROUP_ATYPE(CGROUP_SETSOCKOPT); CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; @@ -120,6 +125,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); @@ -137,11 +143,12 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, - int *optname, char __user *optval, + int *optname, sockptr_t optval, int *optlen, char **kernel_optval); + int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, - int optname, char __user *optval, - int __user *optlen, int max_optlen, + int optname, sockptr_t optval, + sockptr_t optlen, int max_optlen, int retval); int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, @@ -199,9 +206,9 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \ + if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk) && \ + if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ CGROUP_INET_EGRESS); \ @@ -230,22 +237,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -256,14 +263,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ @@ -276,29 +283,38 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) + +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by @@ -377,7 +393,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ - get_user(__ret, optlen); \ + copy_from_sockptr(&__ret, optlen, sizeof(int)); \ __ret; \ }) @@ -477,24 +493,27 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled(atype) (0) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f58895830ada..cff5bb08820e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -55,7 +55,7 @@ struct cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; -extern struct bpf_mem_alloc bpf_global_ma; +extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; extern bool bpf_global_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); @@ -180,14 +180,15 @@ enum btf_field_type { BPF_TIMER = (1 << 1), BPF_KPTR_UNREF = (1 << 2), BPF_KPTR_REF = (1 << 3), - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, - BPF_LIST_HEAD = (1 << 4), - BPF_LIST_NODE = (1 << 5), - BPF_RB_ROOT = (1 << 6), - BPF_RB_NODE = (1 << 7), + BPF_KPTR_PERCPU = (1 << 4), + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU, + BPF_LIST_HEAD = (1 << 5), + BPF_LIST_NODE = (1 << 6), + BPF_RB_ROOT = (1 << 7), + BPF_RB_NODE = (1 << 8), BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | BPF_RB_NODE | BPF_RB_ROOT, - BPF_REFCOUNT = (1 << 8), + BPF_REFCOUNT = (1 << 9), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -228,6 +229,18 @@ struct btf_record { struct btf_field fields[]; }; +/* Non-opaque version of bpf_rb_node in uapi/linux/bpf.h */ +struct bpf_rb_node_kern { + struct rb_node rb_node; + void *owner; +} __attribute__((aligned(8))); + +/* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */ +struct bpf_list_node_kern { + struct list_head list_head; + void *owner; +} __attribute__((aligned(8))); + struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -275,6 +288,7 @@ struct bpf_map { } owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ + s64 __percpu *elem_count; }; static inline const char *btf_field_type_name(enum btf_field_type type) @@ -287,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; + case BPF_KPTR_PERCPU: + return "percpu_kptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -312,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -338,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); @@ -376,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_TIMER: case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: break; default: WARN_ON_ONCE(1); @@ -425,7 +444,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) size /= sizeof(long); while (size--) - *ldst++ = *lsrc++; + data_race(*ldst++ = *lsrc++); } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ @@ -640,7 +659,8 @@ enum bpf_type_flag { MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS), /* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning. - * Currently only valid for linked-list and rbtree nodes. + * Currently only valid for linked-list and rbtree nodes. If the nodes + * have a bpf_refcount_field, they must be tagged MEM_RCU as well. */ NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS), @@ -889,10 +909,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static bool bpf_is_ldimm64(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + static inline bool bpf_pseudo_func(const struct bpf_insn *insn) { - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; + return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } struct bpf_prog_ops { @@ -1015,6 +1039,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) +/* Indicate that current trampoline is in a tail call context. Then, it has to + * cache and restore tail_call_cnt to avoid infinite tail call loop. + */ +#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ @@ -1293,7 +1322,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, static inline struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) { - return ERR_PTR(-EOPNOTSUPP); + return NULL; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) @@ -1364,6 +1393,7 @@ struct bpf_prog_aux { u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ + u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ u32 ctx_arg_info_size; @@ -1384,6 +1414,8 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool exception_cb; + bool exception_boundary; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1406,6 +1438,7 @@ struct bpf_prog_aux { int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; + unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp); #ifdef CONFIG_SECURITY void *security; #endif @@ -1537,6 +1570,53 @@ struct bpf_struct_ops_value; struct btf_member; #define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64 +/** + * struct bpf_struct_ops - A structure of callbacks allowing a subsystem to + * define a BPF_MAP_TYPE_STRUCT_OPS map type composed + * of BPF_PROG_TYPE_STRUCT_OPS progs. + * @verifier_ops: A structure of callbacks that are invoked by the verifier + * when determining whether the struct_ops progs in the + * struct_ops map are valid. + * @init: A callback that is invoked a single time, and before any other + * callback, to initialize the structure. A nonzero return value means + * the subsystem could not be initialized. + * @check_member: When defined, a callback invoked by the verifier to allow + * the subsystem to determine if an entry in the struct_ops map + * is valid. A nonzero return value means that the map is + * invalid and should be rejected by the verifier. + * @init_member: A callback that is invoked for each member of the struct_ops + * map to allow the subsystem to initialize the member. A nonzero + * value means the member could not be initialized. This callback + * is exclusive with the @type, @type_id, @value_type, and + * @value_id fields. + * @reg: A callback that is invoked when the struct_ops map has been + * initialized and is being attached to. Zero means the struct_ops map + * has been successfully registered and is live. A nonzero return value + * means the struct_ops map could not be registered. + * @unreg: A callback that is invoked when the struct_ops map should be + * unregistered. + * @update: A callback that is invoked when the live struct_ops map is being + * updated to contain new values. This callback is only invoked when + * the struct_ops map is loaded with BPF_F_LINK. If not defined, the + * it is assumed that the struct_ops map cannot be updated. + * @validate: A callback that is invoked after all of the members have been + * initialized. This callback should perform static checks on the + * map, meaning that it should either fail or succeed + * deterministically. A struct_ops map that has been validated may + * not necessarily succeed in being registered if the call to @reg + * fails. For example, a valid struct_ops map may be loaded, but + * then fail to be registered due to there being another active + * struct_ops map on the system in the subsystem already. For this + * reason, if this callback is not defined, the check is skipped as + * the struct_ops map will have final verification performed in + * @reg. + * @type: BTF type. + * @value_type: Value type. + * @name: The name of the struct bpf_struct_ops object. + * @func_models: Func models + * @type_id: BTF type id. + * @value_id: BTF value id. + */ struct bpf_struct_ops { const struct bpf_verifier_ops *verifier_ops; int (*init)(struct btf *btf); @@ -1806,6 +1886,7 @@ struct bpf_cg_run_ctx { struct bpf_trace_run_ctx { struct bpf_run_ctx run_ctx; u64 bpf_cookie; + bool is_uprobe; }; struct bpf_tramp_run_ctx { @@ -1854,6 +1935,8 @@ bpf_prog_run_array(const struct bpf_prog_array *array, if (unlikely(!array)) return ret; + run_ctx.is_uprobe = false; + migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; @@ -1878,8 +1961,8 @@ bpf_prog_run_array(const struct bpf_prog_array *array, * rcu-protected dynamically sized maps. */ static __always_inline u32 -bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog) +bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, + const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; @@ -1893,6 +1976,8 @@ bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu, rcu_read_lock_trace(); migrate_disable(); + run_ctx.is_uprobe = true; + array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); if (unlikely(!array)) goto out; @@ -1977,6 +2062,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); @@ -2040,6 +2126,35 @@ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, } #endif +static inline int +bpf_map_init_elem_count(struct bpf_map *map) +{ + size_t size = sizeof(*map->elem_count), align = size; + gfp_t flags = GFP_USER | __GFP_NOWARN; + + map->elem_count = bpf_map_alloc_percpu(map, size, align, flags); + if (!map->elem_count) + return -ENOMEM; + + return 0; +} + +static inline void +bpf_map_free_elem_count(struct bpf_map *map) +{ + free_percpu(map->elem_count); +} + +static inline void bpf_map_inc_elem_count(struct bpf_map *map) +{ + this_cpu_inc(*map->elem_count); +} + +static inline void bpf_map_dec_elem_count(struct bpf_map *map) +{ + this_cpu_dec(*map->elem_count); +} + extern int sysctl_unprivileged_bpf_disabled; static inline bool bpf_allow_ptr_leaks(void) @@ -2054,12 +2169,12 @@ static inline bool bpf_allow_uninit_stack(void) static inline bool bpf_bypass_spec_v1(void) { - return perfmon_capable(); + return cpu_mitigations_off() || perfmon_capable(); } static inline bool bpf_bypass_spec_v4(void) { - return perfmon_capable(); + return cpu_mitigations_off() || perfmon_capable(); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2073,7 +2188,6 @@ void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); @@ -2313,9 +2427,11 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg); + struct bpf_reg_state *reg, bool is_ex_cb); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); +const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); @@ -2367,6 +2483,9 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size); void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); + +bool dev_check_flush(void); +bool cpu_map_check_flush(void); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2619,6 +2738,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) } #endif /* CONFIG_BPF_SYSCALL */ +static __always_inline int +bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) +{ + int ret = -EFAULT; + + if (IS_ENABLED(CONFIG_BPF_EVENTS)) + ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); + if (unlikely(ret < 0)) + memset(dst, 0, size); + return ret; +} + void __bpf_free_used_btfs(struct bpf_prog_aux *aux, struct btf_mod_pair *used_btfs, u32 len); @@ -2799,6 +2930,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +static __always_inline void +bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) +{ + const struct bpf_prog_array_item *item; + struct bpf_prog *prog; + + if (unlikely(!array)) + return; + + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + bpf_prog_inc_misses_counter(prog); + item++; + } +} + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, @@ -3028,6 +3175,9 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old); + void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); @@ -3077,4 +3227,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags) return flags; } +static inline bool bpf_is_subprog(const struct bpf_prog *prog) +{ + return prog->aux->func_idx != 0; +} + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index 3929be5743f4..bb1223b21308 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h @@ -11,6 +11,7 @@ struct bpf_mem_caches; struct bpf_mem_alloc { struct bpf_mem_caches __percpu *caches; struct bpf_mem_cache __percpu *cache; + bool percpu; struct work_struct work; }; @@ -27,10 +28,12 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); /* kmalloc/kfree equivalent: */ void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size); void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr); +void bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr); /* kmem_cache_alloc/free equivalent: */ void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma); void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr); +void bpf_mem_cache_free_rcu(struct bpf_mem_alloc *ma, void *ptr); void bpf_mem_cache_raw_free(void *ptr); void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags); diff --git a/include/linux/bpf_mprog.h b/include/linux/bpf_mprog.h new file mode 100644 index 000000000000..929225f7b095 --- /dev/null +++ b/include/linux/bpf_mprog.h @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2023 Isovalent */ +#ifndef __BPF_MPROG_H +#define __BPF_MPROG_H + +#include <linux/bpf.h> + +/* bpf_mprog framework: + * + * bpf_mprog is a generic layer for multi-program attachment. In-kernel users + * of the bpf_mprog don't need to care about the dependency resolution + * internals, they can just consume it with few API calls. Currently available + * dependency directives are BPF_F_{BEFORE,AFTER} which enable insertion of + * a BPF program or BPF link relative to an existing BPF program or BPF link + * inside the multi-program array as well as prepend and append behavior if + * no relative object was specified, see corresponding selftests for concrete + * examples (e.g. tc_links and tc_opts test cases of test_progs). + * + * Usage of bpf_mprog_{attach,detach,query}() core APIs with pseudo code: + * + * Attach case: + * + * struct bpf_mprog_entry *entry, *entry_new; + * int ret; + * + * // bpf_mprog user-side lock + * // fetch active @entry from attach location + * [...] + * ret = bpf_mprog_attach(entry, &entry_new, [...]); + * if (!ret) { + * if (entry != entry_new) { + * // swap @entry to @entry_new at attach location + * // ensure there are no inflight users of @entry: + * synchronize_rcu(); + * } + * bpf_mprog_commit(entry); + * } else { + * // error path, bail out, propagate @ret + * } + * // bpf_mprog user-side unlock + * + * Detach case: + * + * struct bpf_mprog_entry *entry, *entry_new; + * int ret; + * + * // bpf_mprog user-side lock + * // fetch active @entry from attach location + * [...] + * ret = bpf_mprog_detach(entry, &entry_new, [...]); + * if (!ret) { + * // all (*) marked is optional and depends on the use-case + * // whether bpf_mprog_bundle should be freed or not + * if (!bpf_mprog_total(entry_new)) (*) + * entry_new = NULL (*) + * // swap @entry to @entry_new at attach location + * // ensure there are no inflight users of @entry: + * synchronize_rcu(); + * bpf_mprog_commit(entry); + * if (!entry_new) (*) + * // free bpf_mprog_bundle (*) + * } else { + * // error path, bail out, propagate @ret + * } + * // bpf_mprog user-side unlock + * + * Query case: + * + * struct bpf_mprog_entry *entry; + * int ret; + * + * // bpf_mprog user-side lock + * // fetch active @entry from attach location + * [...] + * ret = bpf_mprog_query(attr, uattr, entry); + * // bpf_mprog user-side unlock + * + * Data/fast path: + * + * struct bpf_mprog_entry *entry; + * struct bpf_mprog_fp *fp; + * struct bpf_prog *prog; + * int ret = [...]; + * + * rcu_read_lock(); + * // fetch active @entry from attach location + * [...] + * bpf_mprog_foreach_prog(entry, fp, prog) { + * ret = bpf_prog_run(prog, [...]); + * // process @ret from program + * } + * [...] + * rcu_read_unlock(); + * + * bpf_mprog locking considerations: + * + * bpf_mprog_{attach,detach,query}() must be protected by an external lock + * (like RTNL in case of tcx). + * + * bpf_mprog_entry pointer can be an __rcu annotated pointer (in case of tcx + * the netdevice has tcx_ingress and tcx_egress __rcu pointer) which gets + * updated via rcu_assign_pointer() pointing to the active bpf_mprog_entry of + * the bpf_mprog_bundle. + * + * Fast path accesses the active bpf_mprog_entry within RCU critical section + * (in case of tcx it runs in NAPI which provides RCU protection there, + * other users might need explicit rcu_read_lock()). The bpf_mprog_commit() + * assumes that for the old bpf_mprog_entry there are no inflight users + * anymore. + * + * The READ_ONCE()/WRITE_ONCE() pairing for bpf_mprog_fp's prog access is for + * the replacement case where we don't swap the bpf_mprog_entry. + */ + +#define bpf_mprog_foreach_tuple(entry, fp, cp, t) \ + for (fp = &entry->fp_items[0], cp = &entry->parent->cp_items[0];\ + ({ \ + t.prog = READ_ONCE(fp->prog); \ + t.link = cp->link; \ + t.prog; \ + }); \ + fp++, cp++) + +#define bpf_mprog_foreach_prog(entry, fp, p) \ + for (fp = &entry->fp_items[0]; \ + (p = READ_ONCE(fp->prog)); \ + fp++) + +#define BPF_MPROG_MAX 64 + +struct bpf_mprog_fp { + struct bpf_prog *prog; +}; + +struct bpf_mprog_cp { + struct bpf_link *link; +}; + +struct bpf_mprog_entry { + struct bpf_mprog_fp fp_items[BPF_MPROG_MAX]; + struct bpf_mprog_bundle *parent; +}; + +struct bpf_mprog_bundle { + struct bpf_mprog_entry a; + struct bpf_mprog_entry b; + struct bpf_mprog_cp cp_items[BPF_MPROG_MAX]; + struct bpf_prog *ref; + atomic64_t revision; + u32 count; +}; + +struct bpf_tuple { + struct bpf_prog *prog; + struct bpf_link *link; +}; + +static inline struct bpf_mprog_entry * +bpf_mprog_peer(const struct bpf_mprog_entry *entry) +{ + if (entry == &entry->parent->a) + return &entry->parent->b; + else + return &entry->parent->a; +} + +static inline void bpf_mprog_bundle_init(struct bpf_mprog_bundle *bundle) +{ + BUILD_BUG_ON(sizeof(bundle->a.fp_items[0]) > sizeof(u64)); + BUILD_BUG_ON(ARRAY_SIZE(bundle->a.fp_items) != + ARRAY_SIZE(bundle->cp_items)); + + memset(bundle, 0, sizeof(*bundle)); + atomic64_set(&bundle->revision, 1); + bundle->a.parent = bundle; + bundle->b.parent = bundle; +} + +static inline void bpf_mprog_inc(struct bpf_mprog_entry *entry) +{ + entry->parent->count++; +} + +static inline void bpf_mprog_dec(struct bpf_mprog_entry *entry) +{ + entry->parent->count--; +} + +static inline int bpf_mprog_max(void) +{ + return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1; +} + +static inline int bpf_mprog_total(struct bpf_mprog_entry *entry) +{ + int total = entry->parent->count; + + WARN_ON_ONCE(total > bpf_mprog_max()); + return total; +} + +static inline bool bpf_mprog_exists(struct bpf_mprog_entry *entry, + struct bpf_prog *prog) +{ + const struct bpf_mprog_fp *fp; + const struct bpf_prog *tmp; + + bpf_mprog_foreach_prog(entry, fp, tmp) { + if (tmp == prog) + return true; + } + return false; +} + +static inline void bpf_mprog_mark_for_release(struct bpf_mprog_entry *entry, + struct bpf_tuple *tuple) +{ + WARN_ON_ONCE(entry->parent->ref); + if (!tuple->link) + entry->parent->ref = tuple->prog; +} + +static inline void bpf_mprog_complete_release(struct bpf_mprog_entry *entry) +{ + /* In the non-link case prog deletions can only drop the reference + * to the prog after the bpf_mprog_entry got swapped and the + * bpf_mprog ensured that there are no inflight users anymore. + * + * Paired with bpf_mprog_mark_for_release(). + */ + if (entry->parent->ref) { + bpf_prog_put(entry->parent->ref); + entry->parent->ref = NULL; + } +} + +static inline void bpf_mprog_revision_new(struct bpf_mprog_entry *entry) +{ + atomic64_inc(&entry->parent->revision); +} + +static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry) +{ + bpf_mprog_complete_release(entry); + bpf_mprog_revision_new(entry); +} + +static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry) +{ + return atomic64_read(&entry->parent->revision); +} + +static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst, + struct bpf_mprog_entry *src) +{ + memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items)); +} + +static inline void bpf_mprog_entry_clear(struct bpf_mprog_entry *dst) +{ + memset(dst->fp_items, 0, sizeof(dst->fp_items)); +} + +static inline void bpf_mprog_clear_all(struct bpf_mprog_entry *entry, + struct bpf_mprog_entry **entry_new) +{ + struct bpf_mprog_entry *peer; + + peer = bpf_mprog_peer(entry); + bpf_mprog_entry_clear(peer); + peer->parent->count = 0; + *entry_new = peer; +} + +static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx) +{ + int total = bpf_mprog_total(entry); + + memmove(entry->fp_items + idx + 1, + entry->fp_items + idx, + (total - idx) * sizeof(struct bpf_mprog_fp)); + + memmove(entry->parent->cp_items + idx + 1, + entry->parent->cp_items + idx, + (total - idx) * sizeof(struct bpf_mprog_cp)); +} + +static inline void bpf_mprog_entry_shrink(struct bpf_mprog_entry *entry, int idx) +{ + /* Total array size is needed in this case to enure the NULL + * entry is copied at the end. + */ + int total = ARRAY_SIZE(entry->fp_items); + + memmove(entry->fp_items + idx, + entry->fp_items + idx + 1, + (total - idx - 1) * sizeof(struct bpf_mprog_fp)); + + memmove(entry->parent->cp_items + idx, + entry->parent->cp_items + idx + 1, + (total - idx - 1) * sizeof(struct bpf_mprog_cp)); +} + +static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 idx, + struct bpf_mprog_fp **fp, + struct bpf_mprog_cp **cp) +{ + *fp = &entry->fp_items[idx]; + *cp = &entry->parent->cp_items[idx]; +} + +static inline void bpf_mprog_write(struct bpf_mprog_fp *fp, + struct bpf_mprog_cp *cp, + struct bpf_tuple *tuple) +{ + WRITE_ONCE(fp->prog, tuple->prog); + cp->link = tuple->link; +} + +int bpf_mprog_attach(struct bpf_mprog_entry *entry, + struct bpf_mprog_entry **entry_new, + struct bpf_prog *prog_new, struct bpf_link *link, + struct bpf_prog *prog_old, + u32 flags, u32 id_or_fd, u64 revision); + +int bpf_mprog_detach(struct bpf_mprog_entry *entry, + struct bpf_mprog_entry **entry_new, + struct bpf_prog *prog, struct bpf_link *link, + u32 flags, u32 id_or_fd, u64 revision); + +int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, + struct bpf_mprog_entry *entry); + +static inline bool bpf_mprog_supported(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_SCHED_CLS: + return true; + default: + return false; + } +} +#endif /* __BPF_MPROG_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fc0d6f32c687..94baced5a1ad 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -142,9 +142,13 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) #ifdef CONFIG_NET BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns) BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) +BPF_LINK_TYPE(BPF_LINK_TYPE_NETFILTER, netfilter) +BPF_LINK_TYPE(BPF_LINK_TYPE_TCX, tcx) +BPF_LINK_TYPE(BPF_LINK_TYPE_NETKIT, netkit) #endif #ifdef CONFIG_PERF_EVENTS BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) +BPF_LINK_TYPE(BPF_LINK_TYPE_UPROBE_MULTI, uprobe_multi) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f70f9ac884d2..aa4d19d0bc94 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -300,6 +300,18 @@ struct bpf_func_state { bool in_callback_fn; struct tnum callback_ret_range; bool in_async_callback_fn; + bool in_exception_callback_fn; + /* For callback calling functions that limit number of possible + * callback executions (e.g. bpf_loop) keeps track of current + * simulated iteration number. + * Value in frame N refers to number of times callback with frame + * N+1 was simulated, e.g. for the following call: + * + * bpf_loop(..., fn, ...); | suppose current frame is N + * | fn would be simulated in frame N+1 + * | number of simulations is tracked in frame N + */ + u32 callback_depth; /* The following fields should be last. See copy_func_state() */ int acquired_refs; @@ -372,10 +384,25 @@ struct bpf_verifier_state { struct bpf_active_lock active_lock; bool speculative; bool active_rcu_lock; + /* If this state was ever pointed-to by other state's loop_entry field + * this flag would be set to true. Used to avoid freeing such states + * while they are still in use. + */ + bool used_as_loop_entry; /* first and last insn idx of this verifier state */ u32 first_insn_idx; u32 last_insn_idx; + /* If this state is a part of states loop this field points to some + * parent of this state such that: + * - it is also a member of the same states loop; + * - DFS states traversal starting from initial state visits loop_entry + * state before this state. + * Used to compute topmost loop entry for state loops. + * State loops might appear because of open coded iterators logic. + * See get_loop_entry() for more information. + */ + struct bpf_verifier_state *loop_entry; /* jmp history recorded from first to last. * backtracking is using it to go from last to first. * For most states jmp_history_cnt is [0-3]. @@ -383,21 +410,22 @@ struct bpf_verifier_state { */ struct bpf_idx_pair *jmp_history; u32 jmp_history_cnt; + u32 dfs_depth; + u32 callback_unroll_depth; }; -#define bpf_get_spilled_reg(slot, frame) \ +#define bpf_get_spilled_reg(slot, frame, mask) \ (((slot < frame->allocated_stack / BPF_REG_SIZE) && \ - (frame->stack[slot].slot_type[0] == STACK_SPILL)) \ + ((1 << frame->stack[slot].slot_type[0]) & (mask))) \ ? &frame->stack[slot].spilled_ptr : NULL) /* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */ -#define bpf_for_each_spilled_reg(iter, frame, reg) \ - for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \ +#define bpf_for_each_spilled_reg(iter, frame, reg, mask) \ + for (iter = 0, reg = bpf_get_spilled_reg(iter, frame, mask); \ iter < frame->allocated_stack / BPF_REG_SIZE; \ - iter++, reg = bpf_get_spilled_reg(iter, frame)) + iter++, reg = bpf_get_spilled_reg(iter, frame, mask)) -/* Invoke __expr over regsiters in __vst, setting __state and __reg */ -#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ +#define bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, __mask, __expr) \ ({ \ struct bpf_verifier_state *___vstate = __vst; \ int ___i, ___j; \ @@ -409,7 +437,7 @@ struct bpf_verifier_state { __reg = &___regs[___j]; \ (void)(__expr); \ } \ - bpf_for_each_spilled_reg(___j, __state, __reg) { \ + bpf_for_each_spilled_reg(___j, __state, __reg, __mask) { \ if (!__reg) \ continue; \ (void)(__expr); \ @@ -417,6 +445,10 @@ struct bpf_verifier_state { } \ }) +/* Invoke __expr over regsiters in __vst, setting __state and __reg */ +#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ + bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, 1 << STACK_SPILL, __expr) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; @@ -480,6 +512,7 @@ struct bpf_insn_aux_data { bool zext_dst; /* this insn zero extends dst reg */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */ + bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ @@ -490,6 +523,10 @@ struct bpf_insn_aux_data { * this instruction, regardless of any heuristics */ bool force_checkpoint; + /* true if instruction is a call to a helper function that + * accepts callback function as a parameter. + */ + bool calls_callback; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -540,7 +577,9 @@ struct bpf_subprog_info { bool has_tail_call; bool tail_call_reachable; bool has_ld_abs; + bool is_cb; bool is_async_cb; + bool is_exception_cb; }; struct bpf_verifier_env; @@ -587,6 +626,8 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ + u32 hidden_subprog_cnt; /* number of hidden subprogs */ + int exception_callback_subprog; bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; @@ -594,10 +635,11 @@ struct bpf_verifier_env { bool bypass_spec_v1; bool bypass_spec_v4; bool seen_direct_write; + bool seen_exception; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; - struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */ union { struct bpf_idmap idmap_scratch; struct bpf_idset idset_scratch; @@ -745,7 +787,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) } } -#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED) +#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED | NON_OWN_REF) static inline bool bpf_type_has_unsafe_modifiers(u32 type) { diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 5d732f48f787..1394ba302367 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -11,6 +11,7 @@ #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 +#define PHY_ID_BCM5221 0x004061e0 #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 @@ -44,6 +45,7 @@ #define PHY_ID_BCM7366 0x600d8490 #define PHY_ID_BCM7346 0x600d8650 #define PHY_ID_BCM7362 0x600d84b0 +#define PHY_ID_BCM74165 0x359052c0 #define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7435 0x600d8750 @@ -330,6 +332,15 @@ #define BCM54XX_WOL_INT_STATUS (MII_BCM54XX_EXP_SEL_WOL + 0x94) +/* BCM5221 Registers */ +#define BCM5221_AEGSR 0x1C +#define BCM5221_AEGSR_MDIX_STATUS BIT(13) +#define BCM5221_AEGSR_MDIX_MAN_SWAP BIT(12) +#define BCM5221_AEGSR_MDIX_DIS BIT(11) + +#define BCM5221_SHDW_AM4_EN_CLK_LPM BIT(2) +#define BCM5221_SHDW_AM4_FORCE_LPM BIT(1) + /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ /*****************************************************************************/ diff --git a/include/linux/btf.h b/include/linux/btf.h index cac9f304e27a..59d404e22814 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -74,6 +74,7 @@ #define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */ #define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ +#define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the @@ -83,6 +84,17 @@ */ #define __bpf_kfunc __used noinline +#define __bpf_kfunc_start_defs() \ + __diag_push(); \ + __diag_ignore_all("-Wmissing-declarations", \ + "Global kfuncs as their definitions will be in BTF");\ + __diag_ignore_all("-Wmissing-prototypes", \ + "Global kfuncs as their definitions will be in BTF") + +#define __bpf_kfunc_end_defs() __diag_pop() +#define __bpf_hook_start() __bpf_kfunc_start_defs() +#define __bpf_hook_end() __bpf_kfunc_end_defs() + /* * Return the name of the passed struct, if exists, or halt the build if for * example the structure gets renamed. In this way, developers have to revisit @@ -204,13 +216,12 @@ u32 btf_nr_types(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); -int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); -int btf_find_timer(const struct btf *btf, const struct btf_type *t); struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size); int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); +s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, u32 id, u32 *res_id); const struct btf_type *btf_type_resolve_ptr(const struct btf *btf, diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 00950cc03bff..a9cb10b0e2e9 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -49,7 +49,7 @@ word \ ____BTF_ID(symbol, word) #define __ID(prefix) \ - __PASTE(prefix, __COUNTER__) + __PASTE(__PASTE(prefix, __COUNTER__), __LINE__) /* * The BTF_ID defines unique symbol for each ID pointing @@ -267,5 +267,6 @@ MAX_BTF_TRACING_TYPE, extern u32 btf_tracing_ids[]; extern u32 bpf_cgroup_btf_id[]; extern u32 bpf_local_storage_map_btf_id[]; +extern u32 btf_bpf_map_id[]; #endif diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6cb3e9af78c9..5f23ee599889 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -16,8 +16,6 @@ #include <linux/wait.h> #include <linux/atomic.h> -#ifdef CONFIG_BLOCK - enum bh_state_bits { BH_Uptodate, /* Contains valid data */ BH_Dirty, /* Is dirty */ @@ -173,7 +171,10 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) return test_bit_acquire(BH_Uptodate, &bh->b_state); } -#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) +static inline unsigned long bh_offset(const struct buffer_head *bh) +{ + return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1); +} /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ @@ -194,29 +195,20 @@ void buffer_check_dirty_writeback(struct folio *folio, void mark_buffer_dirty(struct buffer_head *bh); void mark_buffer_write_io_error(struct buffer_head *bh); void touch_buffer(struct buffer_head *bh); -void set_bh_page(struct buffer_head *bh, - struct page *page, unsigned long offset); void folio_set_bh(struct buffer_head *bh, struct folio *folio, unsigned long offset); -bool try_to_free_buffers(struct folio *); struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, - bool retry); + gfp_t gfp); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, bool retry); -void create_empty_buffers(struct page *, unsigned long, - unsigned long b_state); -void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize, - unsigned long b_state); +struct buffer_head *create_empty_buffers(struct folio *folio, + unsigned long blocksize, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); void end_buffer_write_sync(struct buffer_head *bh, int uptodate); void end_buffer_async_write(struct buffer_head *bh, int uptodate); /* Things to do with buffers at mapping->private_list */ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); -int inode_has_buffers(struct inode *); -void invalidate_inode_buffers(struct inode *); -int remove_inode_buffers(struct inode *inode); -int sync_mapping_buffers(struct address_space *mapping); int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, bool datasync); int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, @@ -233,16 +225,13 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); -struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, - unsigned size, gfp_t gfp); +struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); -void invalidate_bh_lrus(void); -void invalidate_bh_lrus_cpu(void); -bool has_bh_in_lru(int cpu, void *dummy); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); @@ -258,8 +247,6 @@ int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait); void __bh_read_batch(int nr, struct buffer_head *bhs[], blk_opf_t op_flags, bool force_lock); -extern int buffer_heads_over_limit; - /* * Generic address_space_operations implementations for buffer_head-backed * address_spaces. @@ -288,21 +275,9 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, struct page **, void **, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); -int block_commit_write(struct page *page, unsigned from, unsigned to); +void block_commit_write(struct page *page, unsigned int from, unsigned int to); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); -/* Convert errno to return value from ->page_mkwrite() call */ -static inline vm_fault_t block_page_mkwrite_return(int err) -{ - if (err == 0) - return VM_FAULT_LOCKED; - if (err == -EFAULT || err == -EAGAIN) - return VM_FAULT_NOPAGE; - if (err == -ENOMEM) - return VM_FAULT_OOM; - /* -ENOSPC, -EDQUOT, -EIO ... */ - return VM_FAULT_SIGBUS; -} sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); @@ -316,8 +291,6 @@ extern int buffer_migrate_folio_norefs(struct address_space *, #define buffer_migrate_folio_norefs NULL #endif -void buffer_init(void); - /* * inline definitions */ @@ -363,17 +336,38 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } -static inline struct buffer_head * -sb_getblk(struct super_block *sb, sector_t block) +static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, + sector_t block, unsigned size) { - return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); + gfp_t gfp; + + gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); } +static inline struct buffer_head *__getblk(struct block_device *bdev, + sector_t block, unsigned size) +{ + gfp_t gfp; -static inline struct buffer_head * -sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) + gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= __GFP_MOVABLE | __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); +} + +static inline struct buffer_head *sb_getblk(struct super_block *sb, + sector_t block) { - return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); + return __getblk(sb->s_bdev, block, sb->s_blocksize); +} + +static inline struct buffer_head *sb_getblk_gfp(struct super_block *sb, + sector_t block, gfp_t gfp) +{ + return bdev_getblk(sb->s_bdev, block, sb->s_blocksize, gfp); } static inline struct buffer_head * @@ -410,20 +404,6 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } -static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, - sector_t block, - unsigned size) -{ - return __getblk_gfp(bdev, block, size, 0); -} - -static inline struct buffer_head *__getblk(struct block_device *bdev, - sector_t block, - unsigned size) -{ - return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); -} - static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags) { if (!buffer_uptodate(bh) && trylock_buffer(bh)) { @@ -475,9 +455,44 @@ __bread(struct block_device *bdev, sector_t block, unsigned size) return __bread_gfp(bdev, block, size, __GFP_MOVABLE); } +/** + * get_nth_bh - Get a reference on the n'th buffer after this one. + * @bh: The buffer to start counting from. + * @count: How many buffers to skip. + * + * This is primarily useful for finding the nth buffer in a folio; in + * that case you pass the head buffer and the byte offset in the folio + * divided by the block size. It can be used for other purposes, but + * it will wrap at the end of the folio rather than returning NULL or + * proceeding to the next folio for you. + * + * Return: The requested buffer with an elevated refcount. + */ +static inline __must_check +struct buffer_head *get_nth_bh(struct buffer_head *bh, unsigned int count) +{ + while (count--) + bh = bh->b_this_page; + get_bh(bh); + return bh; +} + bool block_dirty_folio(struct address_space *mapping, struct folio *folio); -#else /* CONFIG_BLOCK */ +#ifdef CONFIG_BUFFER_HEAD + +void buffer_init(void); +bool try_to_free_buffers(struct folio *folio); +int inode_has_buffers(struct inode *inode); +void invalidate_inode_buffers(struct inode *inode); +int remove_inode_buffers(struct inode *inode); +int sync_mapping_buffers(struct address_space *mapping); +void invalidate_bh_lrus(void); +void invalidate_bh_lrus_cpu(void); +bool has_bh_in_lru(int cpu, void *dummy); +extern int buffer_heads_over_limit; + +#else /* CONFIG_BUFFER_HEAD */ static inline void buffer_init(void) {} static inline bool try_to_free_buffers(struct folio *folio) { return true; } @@ -485,9 +500,10 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } +static inline void invalidate_bh_lrus(void) {} static inline void invalidate_bh_lrus_cpu(void) {} static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } #define buffer_heads_over_limit 0 -#endif /* CONFIG_BLOCK */ +#endif /* CONFIG_BUFFER_HEAD */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 3b7a0ff4642f..8a582d242f06 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -2,10 +2,11 @@ #ifndef _LINUX_BUILDID_H #define _LINUX_BUILDID_H -#include <linux/mm_types.h> +#include <linux/types.h> #define BUILD_ID_SIZE_MAX 20 +struct vm_area_struct; int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h index a6189d21f2ba..55f297b2c23f 100644 --- a/include/linux/cacheflush.h +++ b/include/linux/cacheflush.h @@ -7,14 +7,23 @@ struct folio; #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +#ifndef flush_dcache_folio void flush_dcache_folio(struct folio *folio); #endif #else static inline void flush_dcache_folio(struct folio *folio) { } -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0 +#define flush_dcache_folio flush_dcache_folio #endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */ +#ifndef flush_icache_pages +static inline void flush_icache_pages(struct vm_area_struct *vma, + struct page *page, unsigned int nr) +{ +} +#endif + +#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1) + #endif /* _LINUX_CACHEFLUSH_H */ diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index a5cfd44fab45..d504eb4b49ab 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -73,6 +73,7 @@ struct cacheinfo { struct cpu_cacheinfo { struct cacheinfo *info_list; + unsigned int per_cpu_data_slice_size; unsigned int num_levels; unsigned int num_leaves; bool cpu_map_populated; diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 982ba245eb41..1b92aed49363 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -195,6 +195,10 @@ int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); const char *can_get_state_str(const enum can_state state); +void can_state_get_by_berr_counter(const struct net_device *dev, + const struct can_berr_counter *bec, + enum can_state *tx_state, + enum can_state *rx_state); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index c205c51d79c9..d29bb4521947 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -3,7 +3,7 @@ * linux/can/rx-offload.h * * Copyright (c) 2014 David Jander, Protonic Holland - * Copyright (c) 2014-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de> + * Copyright (c) 2014-2017, 2023 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de> */ #ifndef _CAN_RX_OFFLOAD_H @@ -44,11 +44,14 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); int can_rx_offload_queue_timestamp(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp); -unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, - unsigned int idx, u32 timestamp, - unsigned int *frame_len_ptr); +unsigned int can_rx_offload_get_echo_skb_queue_timestamp(struct can_rx_offload *offload, + unsigned int idx, u32 timestamp, + unsigned int *frame_len_ptr); int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb); +unsigned int can_rx_offload_get_echo_skb_queue_tail(struct can_rx_offload *offload, + unsigned int idx, + unsigned int *frame_len_ptr); void can_rx_offload_irq_finish(struct can_rx_offload *offload); void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h index bead71b7bc73..94ad2c9017c9 100644 --- a/include/linux/cdx/cdx_bus.h +++ b/include/linux/cdx/cdx_bus.h @@ -21,13 +21,19 @@ struct cdx_controller; enum { + CDX_DEV_BUS_MASTER_CONF, CDX_DEV_RESET_CONF, }; struct cdx_device_config { u8 type; + bool bus_master_enable; }; +typedef int (*cdx_bus_enable_cb)(struct cdx_controller *cdx, u8 bus_num); + +typedef int (*cdx_bus_disable_cb)(struct cdx_controller *cdx, u8 bus_num); + typedef int (*cdx_scan_cb)(struct cdx_controller *cdx); typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx, @@ -35,6 +41,19 @@ typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx, struct cdx_device_config *dev_config); /** + * CDX_DEVICE - macro used to describe a specific CDX device + * @vend: the 16 bit CDX Vendor ID + * @dev: the 16 bit CDX Device ID + * + * This macro is used to create a struct cdx_device_id that matches a + * specific device. The subvendor and subdevice fields will be set to + * CDX_ANY_ID. + */ +#define CDX_DEVICE(vend, dev) \ + .vendor = (vend), .device = (dev), \ + .subvendor = CDX_ANY_ID, .subdevice = CDX_ANY_ID + +/** * CDX_DEVICE_DRIVER_OVERRIDE - macro used to describe a CDX device with * override_only flags. * @vend: the 16 bit CDX Vendor ID @@ -42,18 +61,24 @@ typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx, * @driver_override: the 32 bit CDX Device override_only * * This macro is used to create a struct cdx_device_id that matches only a - * driver_override device. + * driver_override device. The subvendor and subdevice fields will be set to + * CDX_ANY_ID. */ #define CDX_DEVICE_DRIVER_OVERRIDE(vend, dev, driver_override) \ - .vendor = (vend), .device = (dev), .override_only = (driver_override) + .vendor = (vend), .device = (dev), .subvendor = CDX_ANY_ID,\ + .subdevice = CDX_ANY_ID, .override_only = (driver_override) /** * struct cdx_ops - Callbacks supported by CDX controller. + * @bus_enable: enable bus on the controller + * @bus_disable: disable bus on the controller * @scan: scan the devices on the controller * @dev_configure: configuration like reset, master_enable, * msi_config etc for a CDX device */ struct cdx_ops { + cdx_bus_enable_cb bus_enable; + cdx_bus_disable_cb bus_disable; cdx_scan_cb scan; cdx_dev_configure_cb dev_configure; }; @@ -63,12 +88,14 @@ struct cdx_ops { * @dev: Linux device associated with the CDX controller. * @priv: private data * @id: Controller ID + * @controller_registered: controller registered with bus * @ops: CDX controller ops */ struct cdx_controller { struct device *dev; void *priv; u32 id; + bool controller_registered; struct cdx_ops *ops; }; @@ -78,6 +105,10 @@ struct cdx_controller { * @cdx: CDX controller associated with the device * @vendor: Vendor ID for CDX device * @device: Device ID for CDX device + * @subsystem_vendor: Subsystem Vendor ID for CDX device + * @subsystem_device: Subsystem Device ID for CDX device + * @class: Class for the CDX device + * @revision: Revision of the CDX device * @bus_num: Bus number for this CDX device * @dev_num: Device number for this device * @res: array of MMIO region entries @@ -86,6 +117,8 @@ struct cdx_controller { * @dma_mask: Default DMA mask * @flags: CDX device flags * @req_id: Requestor ID associated with CDX device + * @is_bus: Is this bus device + * @enabled: is this bus enabled * @driver_override: driver name to force a match; do not set directly, * because core frees it; use driver_set_override() to * set or clear it. @@ -95,6 +128,10 @@ struct cdx_device { struct cdx_controller *cdx; u16 vendor; u16 device; + u16 subsystem_vendor; + u16 subsystem_device; + u32 class; + u8 revision; u8 bus_num; u8 dev_num; struct resource res[MAX_CDX_DEV_RESOURCES]; @@ -102,6 +139,8 @@ struct cdx_device { u64 dma_mask; u16 flags; u32 req_id; + bool is_bus; + bool enabled; const char *driver_override; }; @@ -170,4 +209,20 @@ extern struct bus_type cdx_bus_type; */ int cdx_dev_reset(struct device *dev); +/** + * cdx_set_master - enables bus-mastering for CDX device + * @cdx_dev: the CDX device to enable + * + * Return: 0 for success, -errno on failure + */ +int cdx_set_master(struct cdx_device *cdx_dev); + +/** + * cdx_clear_master - disables bus-mastering for CDX device + * @cdx_dev: the CDX device to disable + * + * Return: 0 for success, -errno on failure + */ +int cdx_clear_master(struct cdx_device *cdx_dev); + #endif /* _CDX_BUS_H_ */ diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index d5a5da838caf..11a92a946016 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -19,12 +19,25 @@ pr_debug("%.*s %12.12s:%-4d : " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ kbasename(__FILE__), __LINE__, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt, \ + 8 - (int)sizeof(KBUILD_MODNAME), " ", \ + kbasename(__FILE__), __LINE__, \ + &client->fsid, client->monc.auth->global_id, \ + ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) do { \ if (0) \ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ } while (0) +# define doutc(client, fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG "[%pU %llu] " fmt, \ + &client->fsid, \ + client->monc.auth->global_id, \ + ##__VA_ARGS__); \ + } while (0) # endif #else @@ -33,7 +46,32 @@ * or, just wrap pr_debug */ # define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug(" [%pU %llu] %s: " fmt, &client->fsid, \ + client->monc.auth->global_id, __func__, ##__VA_ARGS__) #endif +#define pr_notice_client(client, fmt, ...) \ + pr_notice("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_info_client(client, fmt, ...) \ + pr_info("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_client(client, fmt, ...) \ + pr_warn("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_once_client(client, fmt, ...) \ + pr_warn_once("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_client(client, fmt, ...) \ + pr_err("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_ratelimited_client(client, fmt, ...) \ + pr_warn_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_ratelimited_client(client, fmt, ...) \ + pr_err_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) + #endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 49586ff26152..ee1d0e5f9789 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -357,16 +357,26 @@ enum { CEPH_MDS_OP_RENAMESNAP = 0x01403, }; -extern const char *ceph_mds_op_name(int op); +#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \ + op == CEPH_MDS_OP_MKNOD || \ + op == CEPH_MDS_OP_MKDIR || \ + op == CEPH_MDS_OP_SYMLINK) +extern const char *ceph_mds_op_name(int op); -#define CEPH_SETATTR_MODE 1 -#define CEPH_SETATTR_UID 2 -#define CEPH_SETATTR_GID 4 -#define CEPH_SETATTR_MTIME 8 -#define CEPH_SETATTR_ATIME 16 -#define CEPH_SETATTR_SIZE 32 -#define CEPH_SETATTR_CTIME 64 +#define CEPH_SETATTR_MODE (1 << 0) +#define CEPH_SETATTR_UID (1 << 1) +#define CEPH_SETATTR_GID (1 << 2) +#define CEPH_SETATTR_MTIME (1 << 3) +#define CEPH_SETATTR_ATIME (1 << 4) +#define CEPH_SETATTR_SIZE (1 << 5) +#define CEPH_SETATTR_CTIME (1 << 6) +#define CEPH_SETATTR_MTIME_NOW (1 << 7) +#define CEPH_SETATTR_ATIME_NOW (1 << 8) +#define CEPH_SETATTR_BTIME (1 << 9) +#define CEPH_SETATTR_KILL_SGUID (1 << 10) +#define CEPH_SETATTR_FSCRYPT_AUTH (1 << 11) +#define CEPH_SETATTR_FSCRYPT_FILE (1 << 12) /* * Ceph setxattr request flags. @@ -479,7 +489,7 @@ union ceph_mds_request_args_ext { #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ #define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */ -struct ceph_mds_request_head_old { +struct ceph_mds_request_head_legacy { __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ __le32 flags; /* CEPH_MDS_FLAG_* */ @@ -492,9 +502,9 @@ struct ceph_mds_request_head_old { union ceph_mds_request_args args; } __attribute__ ((packed)); -#define CEPH_MDS_REQUEST_HEAD_VERSION 1 +#define CEPH_MDS_REQUEST_HEAD_VERSION 3 -struct ceph_mds_request_head { +struct ceph_mds_request_head_old { __le16 version; /* struct version */ __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ @@ -508,6 +518,26 @@ struct ceph_mds_request_head { union ceph_mds_request_args_ext args; } __attribute__ ((packed)); +struct ceph_mds_request_head { + __le16 version; /* struct version */ + __le64 oldest_client_tid; + __le32 mdsmap_epoch; /* on client */ + __le32 flags; /* CEPH_MDS_FLAG_* */ + __u8 num_retry, num_fwd; /* legacy count retry and fwd attempts */ + __le16 num_releases; /* # include cap/lease release records */ + __le32 op; /* mds op code */ + __le32 caller_uid, caller_gid; + __le64 ino; /* use this ino for openc, mkdir, mknod, + etc. (if replaying) */ + union ceph_mds_request_args_ext args; + + __le32 ext_num_retry; /* new count retry attempts */ + __le32 ext_num_fwd; /* new count fwd attempts */ + + __le32 struct_len; /* to store size of struct ceph_mds_request_head */ + __le32 owner_uid, owner_gid; /* used for OPs which create inodes */ +} __attribute__ ((packed)); + /* cap/lease release record */ struct ceph_mds_request_release { __le64 ino, cap_id; /* ino and unique cap id */ diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h deleted file mode 100644 index 4c3e0648dc27..000000000000 --- a/include/linux/ceph/mdsmap.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _FS_CEPH_MDSMAP_H -#define _FS_CEPH_MDSMAP_H - -#include <linux/bug.h> -#include <linux/ceph/types.h> - -/* - * mds map - describe servers in the mds cluster. - * - * we limit fields to those the client actually xcares about - */ -struct ceph_mds_info { - u64 global_id; - struct ceph_entity_addr addr; - s32 state; - int num_export_targets; - bool laggy; - u32 *export_targets; -}; - -struct ceph_mdsmap { - u32 m_epoch, m_client_epoch, m_last_failure; - u32 m_root; - u32 m_session_timeout; /* seconds */ - u32 m_session_autoclose; /* seconds */ - u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ - u32 m_max_mds; /* expected up:active mds number */ - u32 m_num_active_mds; /* actual up:active mds number */ - u32 possible_max_rank; /* possible max rank index */ - struct ceph_mds_info *m_info; - - /* which object pools file data can be stored in */ - int m_num_data_pg_pools; - u64 *m_data_pg_pools; - u64 m_cas_pg_pool; - - bool m_enabled; - bool m_damaged; - int m_num_laggy; -}; - -static inline struct ceph_entity_addr * -ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) -{ - if (w >= m->possible_max_rank) - return NULL; - return &m->m_info[w].addr; -} - -static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) -{ - BUG_ON(w < 0); - if (w >= m->possible_max_rank) - return CEPH_MDS_STATE_DNE; - return m->m_info[w].state; -} - -static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) -{ - if (w >= 0 && w < m->possible_max_rank) - return m->m_info[w].laggy; - return false; -} - -extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); -struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2); -extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); -extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); - -#endif diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 99c1726be6ee..2eaaabbe98cb 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -17,6 +17,7 @@ struct ceph_msg; struct ceph_connection; +struct ceph_msg_data_cursor; /* * Ceph defines these callbacks for handling connection events. @@ -70,6 +71,30 @@ struct ceph_connection_operations { int used_proto, int result, const int *allowed_protos, int proto_cnt, const int *allowed_modes, int mode_cnt); + + /** + * sparse_read: read sparse data + * @con: connection we're reading from + * @cursor: data cursor for reading extents + * @buf: optional buffer to read into + * + * This should be called more than once, each time setting up to + * receive an extent into the current cursor position, and zeroing + * the holes between them. + * + * Returns amount of data to be read (in bytes), 0 if reading is + * complete, or -errno if there was an error. + * + * If @buf is set on a >0 return, then the data should be read into + * the provided buffer. Otherwise, it should be read into the cursor. + * + * The sparse read operation is expected to initialize the cursor + * with a length covering up to the end of the last extent. + */ + int (*sparse_read)(struct ceph_connection *con, + struct ceph_msg_data_cursor *cursor, + char **buf); + }; /* use format string %s%lld */ @@ -98,6 +123,7 @@ enum ceph_msg_data_type { CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */ #endif /* CONFIG_BLOCK */ CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */ + CEPH_MSG_DATA_ITER, /* data source/destination is an iov_iter */ }; #ifdef CONFIG_BLOCK @@ -199,6 +225,7 @@ struct ceph_msg_data { bool own_pages; }; struct ceph_pagelist *pagelist; + struct iov_iter iter; }; }; @@ -207,6 +234,7 @@ struct ceph_msg_data_cursor { struct ceph_msg_data *data; /* current data item */ size_t resid; /* bytes not yet consumed */ + int sr_resid; /* residual sparse_read len */ bool need_crc; /* crc update needed */ union { #ifdef CONFIG_BLOCK @@ -222,6 +250,10 @@ struct ceph_msg_data_cursor { struct page *page; /* page from list */ size_t offset; /* bytes from list */ }; + struct { + struct iov_iter iov_iter; + unsigned int lastlen; + }; }; }; @@ -251,6 +283,7 @@ struct ceph_msg { struct kref kref; bool more_to_follow; bool needs_out_seq; + bool sparse_read; int front_alloc_len; struct ceph_msgpool *pool; @@ -309,6 +342,10 @@ struct ceph_connection_v1_info { int in_base_pos; /* bytes read */ + /* sparse reads */ + struct kvec in_sr_kvec; /* current location to receive into */ + u64 in_sr_len; /* amount of data in this extent */ + /* message in temps */ u8 in_tag; /* protocol control byte */ struct ceph_msg_header in_hdr; @@ -395,6 +432,7 @@ struct ceph_connection_v2_info { void *conn_bufs[16]; int conn_buf_cnt; + int data_len_remain; struct kvec in_sign_kvecs[8]; struct kvec out_sign_kvecs[8]; @@ -573,6 +611,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, #endif /* CONFIG_BLOCK */ void ceph_msg_data_add_bvecs(struct ceph_msg *msg, struct ceph_bvec_iter *bvec_pos); +void ceph_msg_data_add_iter(struct ceph_msg *msg, + struct iov_iter *iter); struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, gfp_t flags, bool can_fail); diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index b658961156a0..7a9a40163c0f 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -19,7 +19,7 @@ struct ceph_monmap { struct ceph_fsid fsid; u32 epoch; u32 num_mon; - struct ceph_entity_inst mon_inst[]; + struct ceph_entity_inst mon_inst[] __counted_by(num_mon); }; struct ceph_mon_client; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fb6be72104df..b8610e9d2471 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -29,14 +29,62 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); #define CEPH_HOMELESS_OSD -1 -/* a given osd we're communicating with */ +/* + * A single extent in a SPARSE_READ reply. + * + * Note that these come from the OSD as little-endian values. On BE arches, + * we convert them in-place after receipt. + */ +struct ceph_sparse_extent { + u64 off; + u64 len; +} __packed; + +/* Sparse read state machine state values */ +enum ceph_sparse_read_state { + CEPH_SPARSE_READ_HDR = 0, + CEPH_SPARSE_READ_EXTENTS, + CEPH_SPARSE_READ_DATA_LEN, + CEPH_SPARSE_READ_DATA, +}; + +/* + * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of + * 64-bit offset/length pairs, and then all of the actual file data + * concatenated after it (sans holes). + * + * Unfortunately, we don't know how long the extent array is until we've + * started reading the data section of the reply. The caller should send down + * a destination buffer for the array, but we'll alloc one if it's too small + * or if the caller doesn't. + */ +struct ceph_sparse_read { + enum ceph_sparse_read_state sr_state; /* state machine state */ + u64 sr_req_off; /* orig request offset */ + u64 sr_req_len; /* orig request length */ + u64 sr_pos; /* current pos in buffer */ + int sr_index; /* current extent index */ + __le32 sr_datalen; /* length of actual data */ + u32 sr_count; /* extent count in reply */ + int sr_ext_len; /* length of extent array */ + struct ceph_sparse_extent *sr_extent; /* extent array */ +}; + +/* + * A given osd we're communicating with. + * + * Note that the o_requests tree can be searched while holding the "lock" mutex + * or the "o_requests_lock" spinlock. Insertion or removal requires both! + */ struct ceph_osd { refcount_t o_ref; + int o_sparse_op_idx; struct ceph_osd_client *o_osdc; int o_osd; int o_incarnation; struct rb_node o_node; struct ceph_connection o_con; + spinlock_t o_requests_lock; struct rb_root o_requests; struct rb_root o_linger_requests; struct rb_root o_backoff_mappings; @@ -46,6 +94,7 @@ struct ceph_osd { unsigned long lru_ttl; struct list_head o_keepalive_item; struct mutex lock; + struct ceph_sparse_read o_sparse_read; }; #define CEPH_OSD_SLAB_OPS 2 @@ -59,6 +108,7 @@ enum ceph_osd_data_type { CEPH_OSD_DATA_TYPE_BIO, #endif /* CONFIG_BLOCK */ CEPH_OSD_DATA_TYPE_BVECS, + CEPH_OSD_DATA_TYPE_ITER, }; struct ceph_osd_data { @@ -82,6 +132,7 @@ struct ceph_osd_data { struct ceph_bvec_iter bvec_pos; u32 num_bvecs; }; + struct iov_iter iter; }; }; @@ -98,6 +149,8 @@ struct ceph_osd_req_op { u64 offset, length; u64 truncate_size; u32 truncate_seq; + int sparse_ext_cnt; + struct ceph_sparse_extent *sparse_ext; struct ceph_osd_data osd_data; } extent; struct { @@ -145,6 +198,9 @@ struct ceph_osd_req_op { u32 src_fadvise_flags; struct ceph_osd_data osd_data; } copy_from; + struct { + u64 ver; + } assert_ver; }; }; @@ -199,6 +255,7 @@ struct ceph_osd_request { struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; + bool r_linger; /* don't resend on failure */ struct completion r_completion; /* private to osd_client.c */ ceph_osdc_callback_t r_callback; @@ -211,9 +268,9 @@ struct ceph_osd_request { struct ceph_snap_context *r_snapc; /* for writes */ struct timespec64 r_mtime; /* ditto */ u64 r_data_offset; /* ditto */ - bool r_linger; /* don't resend on failure */ /* internal */ + u64 r_version; /* data version sent in reply */ unsigned long r_stamp; /* jiffies, send or check time */ unsigned long r_start_stamp; /* jiffies */ ktime_t r_start_latency; /* ktime_t */ @@ -221,7 +278,7 @@ struct ceph_osd_request { int r_attempts; u32 r_map_dne_bound; - struct ceph_osd_req_op r_ops[]; + struct ceph_osd_req_op r_ops[] __counted_by(r_num_ops); }; struct ceph_request_redirect { @@ -450,6 +507,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, unsigned int which, struct ceph_bvec_iter *bvec_pos); +void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, + unsigned int which, struct iov_iter *iter); extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, unsigned int which, @@ -504,6 +563,20 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, u32 truncate_seq, u64 truncate_size, bool use_mempool); +int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt); + +/* + * How big an extent array should we preallocate for a sparse read? This is + * just a starting value. If we get more than this back from the OSD, the + * receiver will reallocate. + */ +#define CEPH_SPARSE_EXT_ARRAY_INITIAL 16 + +static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op) +{ + return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL); +} + extern void ceph_osdc_get_request(struct ceph_osd_request *req); extern void ceph_osdc_put_request(struct ceph_osd_request *req); @@ -558,5 +631,19 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, struct ceph_object_locator *oloc, struct ceph_watch_item **watchers, u32 *num_watchers); -#endif +/* Find offset into the buffer of the end of the extent map */ +static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op) +{ + struct ceph_sparse_extent *ext; + + /* No extents? No data */ + if (op->extent.sparse_ext_cnt == 0) + return 0; + + ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1]; + + return ext->off + ext->len - op->extent.offset; +} + +#endif diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 43a7a1573b51..73c3efbec36c 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -524,6 +524,10 @@ struct ceph_osd_op { __le64 cookie; } __attribute__ ((packed)) notify; struct { + __le64 unused; + __le64 ver; + } __attribute__ ((packed)) assert_ver; + struct { __le64 offset, length; __le64 src_offset; } __attribute__ ((packed)) clonerange; diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 5e134f4ce8b7..3552ec82b725 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -19,11 +19,13 @@ static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs, { return report_cfi_failure(regs, addr, NULL, 0); } +#endif /* CONFIG_CFI_CLANG */ #ifdef CONFIG_ARCH_USES_CFI_TRAPS bool is_cfi_trap(unsigned long addr); +#else +static inline bool is_cfi_trap(unsigned long addr) { return false; } #endif -#endif /* CONFIG_CFI_CLANG */ #ifdef CONFIG_MODULES #ifdef CONFIG_ARCH_USES_CFI_TRAPS diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8a0d5466c7be..4a6b6b77ccb6 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -115,6 +115,11 @@ enum { * Enable recursive subtree protection */ CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18), + + /* + * Enable hugetlb accounting for the memory controller. + */ + CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), }; /* cftype->flags */ @@ -238,7 +243,7 @@ struct css_set { * Lists running through all tasks using this cgroup group. * mg_tasks lists tasks which belong to this cset but are in the * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to + * css_set_lock, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ struct list_head tasks; @@ -342,6 +347,20 @@ struct cgroup_rstat_cpu { struct cgroup_base_stat last_bstat; /* + * This field is used to record the cumulative per-cpu time of + * the cgroup and its descendants. Currently it can be read via + * eBPF/drgn etc, and we are still trying to determine how to + * expose it in the cgroupfs interface. + */ + struct cgroup_base_stat subtree_bstat; + + /* + * Snapshots at the last reading. These are used to calculate the + * deltas to propagate to the per-cpu subtree_bstat. + */ + struct cgroup_base_stat last_subtree_bstat; + + /* * Child cgroups with stat updates on this cpu since the last read * are linked on the parent's ->updated_children through * ->updated_next. @@ -661,6 +680,8 @@ struct cgroup_subsys { void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); int (*css_extra_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); + int (*css_local_stat_show)(struct seq_file *seq, + struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b307013b9c6c..0ef0af66080e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -40,13 +40,11 @@ struct kernel_clone_args; #define CGROUP_WEIGHT_DFL 100 #define CGROUP_WEIGHT_MAX 10000 -/* walk only threadgroup leaders */ -#define CSS_TASK_ITER_PROCS (1U << 0) -/* walk all threaded css_sets in the domain */ -#define CSS_TASK_ITER_THREADED (1U << 1) - -/* internal flags */ -#define CSS_TASK_ITER_SKIPPED (1U << 16) +enum { + CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ + CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ + CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */ +}; /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 53f1a7a932b0..9f1a9c455b68 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -7,8 +7,9 @@ /* * DEFINE_FREE(name, type, free): * simple helper macro that defines the required wrapper for a __free() - * based cleanup function. @free is an expression using '_T' to access - * the variable. + * based cleanup function. @free is an expression using '_T' to access the + * variable. @free should typically include a NULL test before calling a + * function, see the example below. * * __free(name): * variable attribute to add a scoped based cleanup to the variable. @@ -17,6 +18,9 @@ * like a non-atomic xchg(var, NULL), such that the cleanup function will * be inhibited -- provided it sanely deals with a NULL value. * + * NOTE: this has __must_check semantics so that it is harder to accidentally + * leak the resource. + * * return_ptr(p): * returns p while inhibiting the __free(). * @@ -24,6 +28,8 @@ * * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) * + * void *alloc_obj(...) + * { * struct obj *p __free(kfree) = kmalloc(...); * if (!p) * return NULL; @@ -32,6 +38,24 @@ * return NULL; * * return_ptr(p); + * } + * + * NOTE: the DEFINE_FREE()'s @free expression includes a NULL test even though + * kfree() is fine to be called with a NULL value. This is on purpose. This way + * the compiler sees the end of our alloc_obj() function as: + * + * tmp = p; + * p = NULL; + * if (p) + * kfree(p); + * return tmp; + * + * And through the magic of value-propagation and dead-code-elimination, it + * eliminates the actual cleanup call and compiles into: + * + * return p; + * + * Without the NULL test it turns into a mess and the compiler can't help us. */ #define DEFINE_FREE(_name, _type, _free) \ @@ -39,8 +63,17 @@ #define __free(_name) __cleanup(__free_##_name) +#define __get_and_null_ptr(p) \ + ({ __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = NULL; __val; }) + +static inline __must_check +const volatile void * __must_check_fn(const volatile void *val) +{ return val; } + #define no_free_ptr(p) \ - ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + ((typeof(p)) __must_check_fn(__get_and_null_ptr(p))) #define return_ptr(p) return no_free_ptr(p) diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 0f0cd01906b4..ace3a4ce2fc9 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -74,7 +74,7 @@ void clk_hw_forward_rate_request(const struct clk_hw *core, unsigned long parent_rate); /** - * struct clk_duty - Struture encoding the duty cycle ratio of a clock + * struct clk_duty - Structure encoding the duty cycle ratio of a clock * * @num: Numerator of the duty cycle ratio * @den: Denominator of the duty cycle ratio @@ -129,7 +129,7 @@ struct clk_duty { * @restore_context: Restore the context of the clock after a restoration * of power. * - * @recalc_rate Recalculate the rate of this clock, by querying hardware. The + * @recalc_rate: Recalculate the rate of this clock, by querying hardware. The * parent rate is an input parameter. It is up to the caller to * ensure that the prepare_mutex is held across this call. If the * driver cannot figure out a rate for this clock, it must return @@ -456,7 +456,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, * clock with the clock framework * @dev: device that is registering this clock * @name: name of this clock - * @parent_name: name of clock's parent + * @parent_data: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate * @fixed_accuracy: non-adjustable clock accuracy @@ -471,7 +471,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, * the clock framework * @dev: device that is registering this clock * @name: name of this clock - * @parent_name: name of clock's parent + * @parent_data: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate */ @@ -649,7 +649,7 @@ struct clk_div_table { * Clock with an adjustable divider affecting its output frequency. Implements * .recalc_rate, .set_rate and .round_rate * - * Flags: + * @flags: * CLK_DIVIDER_ONE_BASED - by default the divisor is the value read from the * register plus one. If CLK_DIVIDER_ONE_BASED is set then the divider is * the raw value read from the register, with the value of zero considered @@ -1130,11 +1130,12 @@ struct clk_hw *clk_hw_register_fixed_factor_parent_hw(struct device *dev, * @mwidth: width of the numerator bit field * @nshift: shift to the denominator bit field * @nwidth: width of the denominator bit field + * @approximation: clk driver's callback for calculating the divider clock * @lock: register lock * * Clock with adjustable fractional divider affecting its output frequency. * - * Flags: + * @flags: * CLK_FRAC_DIVIDER_ZERO_BASED - by default the numerator and denominator * is the value read from the register. If CLK_FRAC_DIVIDER_ZERO_BASED * is set then the numerator and denominator are both the value read @@ -1191,7 +1192,7 @@ void clk_hw_unregister_fractional_divider(struct clk_hw *hw); * Clock with an adjustable multiplier affecting its output frequency. * Implements .recalc_rate, .set_rate and .round_rate * - * Flags: + * @flags: * CLK_MULTIPLIER_ZERO_BYPASS - By default, the multiplier is the value read * from the register, with 0 being a valid value effectively * zeroing the output clock rate. If CLK_MULTIPLIER_ZERO_BYPASS is @@ -1379,7 +1380,7 @@ struct clk_onecell_data { struct clk_hw_onecell_data { unsigned int num; - struct clk_hw *hws[]; + struct clk_hw *hws[] __counted_by(num); }; #define CLK_OF_DECLARE(name, compat, fn) \ diff --git a/include/linux/clk/mmp.h b/include/linux/clk/mmp.h deleted file mode 100644 index 445130460380..000000000000 --- a/include/linux/clk/mmp.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __CLK_MMP_H -#define __CLK_MMP_H - -#include <linux/types.h> - -extern void pxa168_clk_init(phys_addr_t mpmu_phys, - phys_addr_t apmu_phys, - phys_addr_t apbc_phys); -extern void pxa910_clk_init(phys_addr_t mpmu_phys, - phys_addr_t apmu_phys, - phys_addr_t apbc_phys, - phys_addr_t apbcp_phys); -extern void mmp2_clk_init(phys_addr_t mpmu_phys, - phys_addr_t apmu_phys, - phys_addr_t apbc_phys); - -#endif diff --git a/include/linux/closure.h b/include/linux/closure.h new file mode 100644 index 000000000000..c554c6a08768 --- /dev/null +++ b/include/linux/closure.h @@ -0,0 +1,415 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CLOSURE_H +#define _LINUX_CLOSURE_H + +#include <linux/llist.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> +#include <linux/workqueue.h> + +/* + * Closure is perhaps the most overused and abused term in computer science, but + * since I've been unable to come up with anything better you're stuck with it + * again. + * + * What are closures? + * + * They embed a refcount. The basic idea is they count "things that are in + * progress" - in flight bios, some other thread that's doing something else - + * anything you might want to wait on. + * + * The refcount may be manipulated with closure_get() and closure_put(). + * closure_put() is where many of the interesting things happen, when it causes + * the refcount to go to 0. + * + * Closures can be used to wait on things both synchronously and asynchronously, + * and synchronous and asynchronous use can be mixed without restriction. To + * wait synchronously, use closure_sync() - you will sleep until your closure's + * refcount hits 1. + * + * To wait asynchronously, use + * continue_at(cl, next_function, workqueue); + * + * passing it, as you might expect, the function to run when nothing is pending + * and the workqueue to run that function out of. + * + * continue_at() also, critically, requires a 'return' immediately following the + * location where this macro is referenced, to return to the calling function. + * There's good reason for this. + * + * To use safely closures asynchronously, they must always have a refcount while + * they are running owned by the thread that is running them. Otherwise, suppose + * you submit some bios and wish to have a function run when they all complete: + * + * foo_endio(struct bio *bio) + * { + * closure_put(cl); + * } + * + * closure_init(cl); + * + * do_stuff(); + * closure_get(cl); + * bio1->bi_endio = foo_endio; + * bio_submit(bio1); + * + * do_more_stuff(); + * closure_get(cl); + * bio2->bi_endio = foo_endio; + * bio_submit(bio2); + * + * continue_at(cl, complete_some_read, system_wq); + * + * If closure's refcount started at 0, complete_some_read() could run before the + * second bio was submitted - which is almost always not what you want! More + * importantly, it wouldn't be possible to say whether the original thread or + * complete_some_read()'s thread owned the closure - and whatever state it was + * associated with! + * + * So, closure_init() initializes a closure's refcount to 1 - and when a + * closure_fn is run, the refcount will be reset to 1 first. + * + * Then, the rule is - if you got the refcount with closure_get(), release it + * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount + * on a closure because you called closure_init() or you were run out of a + * closure - _always_ use continue_at(). Doing so consistently will help + * eliminate an entire class of particularly pernicious races. + * + * Lastly, you might have a wait list dedicated to a specific event, and have no + * need for specifying the condition - you just want to wait until someone runs + * closure_wake_up() on the appropriate wait list. In that case, just use + * closure_wait(). It will return either true or false, depending on whether the + * closure was already on a wait list or not - a closure can only be on one wait + * list at a time. + * + * Parents: + * + * closure_init() takes two arguments - it takes the closure to initialize, and + * a (possibly null) parent. + * + * If parent is non null, the new closure will have a refcount for its lifetime; + * a closure is considered to be "finished" when its refcount hits 0 and the + * function to run is null. Hence + * + * continue_at(cl, NULL, NULL); + * + * returns up the (spaghetti) stack of closures, precisely like normal return + * returns up the C stack. continue_at() with non null fn is better thought of + * as doing a tail call. + * + * All this implies that a closure should typically be embedded in a particular + * struct (which its refcount will normally control the lifetime of), and that + * struct can very much be thought of as a stack frame. + */ + +struct closure; +struct closure_syncer; +typedef void (closure_fn) (struct work_struct *); +extern struct dentry *bcache_debug; + +struct closure_waitlist { + struct llist_head list; +}; + +enum closure_state { + /* + * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by + * the thread that owns the closure, and cleared by the thread that's + * waking up the closure. + * + * The rest are for debugging and don't affect behaviour: + * + * CLOSURE_RUNNING: Set when a closure is running (i.e. by + * closure_init() and when closure_put() runs then next function), and + * must be cleared before remaining hits 0. Primarily to help guard + * against incorrect usage and accidentally transferring references. + * continue_at() and closure_return() clear it for you, if you're doing + * something unusual you can use closure_set_dead() which also helps + * annotate where references are being transferred. + */ + + CLOSURE_BITS_START = (1U << 26), + CLOSURE_DESTRUCTOR = (1U << 26), + CLOSURE_WAITING = (1U << 28), + CLOSURE_RUNNING = (1U << 30), +}; + +#define CLOSURE_GUARD_MASK \ + ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1) + +#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) +#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) + +struct closure { + union { + struct { + struct workqueue_struct *wq; + struct closure_syncer *s; + struct llist_node list; + closure_fn *fn; + }; + struct work_struct work; + }; + + struct closure *parent; + + atomic_t remaining; + bool closure_get_happened; + +#ifdef CONFIG_DEBUG_CLOSURES +#define CLOSURE_MAGIC_DEAD 0xc054dead +#define CLOSURE_MAGIC_ALIVE 0xc054a11e + + unsigned int magic; + struct list_head all; + unsigned long ip; + unsigned long waiting_on; +#endif +}; + +void closure_sub(struct closure *cl, int v); +void closure_put(struct closure *cl); +void __closure_wake_up(struct closure_waitlist *list); +bool closure_wait(struct closure_waitlist *list, struct closure *cl); +void __closure_sync(struct closure *cl); + +static inline unsigned closure_nr_remaining(struct closure *cl) +{ + return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK; +} + +/** + * closure_sync - sleep until a closure a closure has nothing left to wait on + * + * Sleeps until the refcount hits 1 - the thread that's running the closure owns + * the last refcount. + */ +static inline void closure_sync(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened); +#endif + + if (cl->closure_get_happened) + __closure_sync(cl); +} + +#ifdef CONFIG_DEBUG_CLOSURES + +void closure_debug_create(struct closure *cl); +void closure_debug_destroy(struct closure *cl); + +#else + +static inline void closure_debug_create(struct closure *cl) {} +static inline void closure_debug_destroy(struct closure *cl) {} + +#endif + +static inline void closure_set_ip(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->ip = _THIS_IP_; +#endif +} + +static inline void closure_set_ret_ip(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->ip = _RET_IP_; +#endif +} + +static inline void closure_set_waiting(struct closure *cl, unsigned long f) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->waiting_on = f; +#endif +} + +static inline void closure_set_stopped(struct closure *cl) +{ + atomic_sub(CLOSURE_RUNNING, &cl->remaining); +} + +static inline void set_closure_fn(struct closure *cl, closure_fn *fn, + struct workqueue_struct *wq) +{ + closure_set_ip(cl); + cl->fn = fn; + cl->wq = wq; +} + +static inline void closure_queue(struct closure *cl) +{ + struct workqueue_struct *wq = cl->wq; + /** + * Changes made to closure, work_struct, or a couple of other structs + * may cause work.func not pointing to the right location. + */ + BUILD_BUG_ON(offsetof(struct closure, fn) + != offsetof(struct work_struct, func)); + + if (wq) { + INIT_WORK(&cl->work, cl->work.func); + BUG_ON(!queue_work(wq, &cl->work)); + } else + cl->fn(&cl->work); +} + +/** + * closure_get - increment a closure's refcount + */ +static inline void closure_get(struct closure *cl) +{ + cl->closure_get_happened = true; + +#ifdef CONFIG_DEBUG_CLOSURES + BUG_ON((atomic_inc_return(&cl->remaining) & + CLOSURE_REMAINING_MASK) <= 1); +#else + atomic_inc(&cl->remaining); +#endif +} + +/** + * closure_init - Initialize a closure, setting the refcount to 1 + * @cl: closure to initialize + * @parent: parent of the new closure. cl will take a refcount on it for its + * lifetime; may be NULL. + */ +static inline void closure_init(struct closure *cl, struct closure *parent) +{ + cl->fn = NULL; + cl->parent = parent; + if (parent) + closure_get(parent); + + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); + cl->closure_get_happened = false; + + closure_debug_create(cl); + closure_set_ip(cl); +} + +static inline void closure_init_stack(struct closure *cl) +{ + memset(cl, 0, sizeof(struct closure)); + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +} + +/** + * closure_wake_up - wake up all closures on a wait list, + * with memory barrier + */ +static inline void closure_wake_up(struct closure_waitlist *list) +{ + /* Memory barrier for the wait list */ + smp_mb(); + __closure_wake_up(list); +} + +#define CLOSURE_CALLBACK(name) void name(struct work_struct *ws) +#define closure_type(name, type, member) \ + struct closure *cl = container_of(ws, struct closure, work); \ + type *name = container_of(cl, type, member) + +/** + * continue_at - jump to another function with barrier + * + * After @cl is no longer waiting on anything (i.e. all outstanding refs have + * been dropped with closure_put()), it will resume execution at @fn running out + * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). + * + * This is because after calling continue_at() you no longer have a ref on @cl, + * and whatever @cl owns may be freed out from under you - a running closure fn + * has a ref on its own closure which continue_at() drops. + * + * Note you are expected to immediately return after using this macro. + */ +#define continue_at(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_sub(_cl, CLOSURE_RUNNING + 1); \ +} while (0) + +/** + * closure_return - finish execution of a closure + * + * This is used to indicate that @cl is finished: when all outstanding refs on + * @cl have been dropped @cl's ref on its parent closure (as passed to + * closure_init()) will be dropped, if one was specified - thus this can be + * thought of as returning to the parent closure. + */ +#define closure_return(_cl) continue_at((_cl), NULL, NULL) + +/** + * continue_at_nobarrier - jump to another function without barrier + * + * Causes @fn to be executed out of @cl, in @wq context (or called directly if + * @wq is NULL). + * + * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, + * thus it's not safe to touch anything protected by @cl after a + * continue_at_nobarrier(). + */ +#define continue_at_nobarrier(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_queue(_cl); \ +} while (0) + +/** + * closure_return_with_destructor - finish execution of a closure, + * with destructor + * + * Works like closure_return(), except @destructor will be called when all + * outstanding refs on @cl have been dropped; @destructor may be used to safely + * free the memory occupied by @cl, and it is called with the ref on the parent + * closure still held - so @destructor could safely return an item to a + * freelist protected by @cl's parent. + */ +#define closure_return_with_destructor(_cl, _destructor) \ +do { \ + set_closure_fn(_cl, _destructor, NULL); \ + closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ +} while (0) + +/** + * closure_call - execute @fn out of a new, uninitialized closure + * + * Typically used when running out of one closure, and we want to run @fn + * asynchronously out of a new closure - @parent will then wait for @cl to + * finish. + */ +static inline void closure_call(struct closure *cl, closure_fn fn, + struct workqueue_struct *wq, + struct closure *parent) +{ + closure_init(cl, parent); + continue_at_nobarrier(cl, fn, wq); +} + +#define __closure_wait_event(waitlist, _cond) \ +do { \ + struct closure cl; \ + \ + closure_init_stack(&cl); \ + \ + while (1) { \ + closure_wait(waitlist, &cl); \ + if (_cond) \ + break; \ + closure_sync(&cl); \ + } \ + closure_wake_up(waitlist); \ + closure_sync(&cl); \ +} while (0) + +#define closure_wait_event(waitlist, _cond) \ +do { \ + if (!(_cond)) \ + __closure_wait_event(waitlist, _cond); \ +} while (0) + +#endif /* _LINUX_CLOSURE_H */ diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h index d8264417e53c..d527f04400df 100644 --- a/include/linux/comedi/comedi_8254.h +++ b/include/linux/comedi/comedi_8254.h @@ -12,6 +12,8 @@ #define _COMEDI_8254_H #include <linux/types.h> +#include <linux/errno.h> +#include <linux/err.h> struct comedi_device; struct comedi_insn; @@ -57,10 +59,24 @@ struct comedi_subdevice; /* counter maps zero to 0x10000 */ #define I8254_MAX_COUNT 0x10000 +struct comedi_8254; + +/** + * typedef comedi_8254_iocb_fn - call-back function type for 8254 register access + * @i8254: pointer to struct comedi_8254 + * @dir: direction (0 = read, 1 = write) + * @reg: register number + * @val: value to write + * + * Return: Register value when reading, 0 when writing. + */ +typedef unsigned int comedi_8254_iocb_fn(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val); + /** * struct comedi_8254 - private data used by this module - * @iobase: PIO base address of the registers (in/out) - * @mmio: MMIO base address of the registers (read/write) + * @iocb: I/O call-back function for register access + * @context: context for register access (e.g. a base address) * @iosize: I/O size used to access the registers (b/w/l) * @regshift: register gap shift * @osc_base: cascaded oscillator speed in ns @@ -76,8 +92,8 @@ struct comedi_subdevice; * @insn_config: driver specific (*insn_config) callback */ struct comedi_8254 { - unsigned long iobase; - void __iomem *mmio; + comedi_8254_iocb_fn *iocb; + unsigned long context; unsigned int iosize; unsigned int regshift; unsigned int osc_base; @@ -122,13 +138,24 @@ void comedi_8254_set_busy(struct comedi_8254 *i8254, void comedi_8254_subdevice_init(struct comedi_subdevice *s, struct comedi_8254 *i8254); -struct comedi_8254 *comedi_8254_init(unsigned long iobase, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift); -struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift); +#ifdef CONFIG_HAS_IOPORT +struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); +#else +static inline struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift) +{ + return ERR_PTR(-ENXIO); +} +#endif + +struct comedi_8254 *comedi_8254_mm_alloc(void __iomem *mmio, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); #endif /* _COMEDI_8254_H */ diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h index b2a5bc6b3a49..d24a69da389b 100644 --- a/include/linux/comedi/comedi_8255.h +++ b/include/linux/comedi/comedi_8255.h @@ -10,6 +10,8 @@ #ifndef _COMEDI_8255_H #define _COMEDI_8255_H +#include <linux/errno.h> + #define I8255_SIZE 0x04 #define I8255_DATA_A_REG 0x00 @@ -27,16 +29,26 @@ struct comedi_device; struct comedi_subdevice; -int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), - unsigned long regbase); +#ifdef CONFIG_HAS_IOPORT +int subdev_8255_io_init(struct comedi_device *dev, struct comedi_subdevice *s, + unsigned long regbase); +#else +static inline int subdev_8255_io_init(struct comedi_device *dev, + struct comedi_subdevice *s, + unsigned long regbase) +{ + return -ENXIO; +} +#endif int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), unsigned long regbase); +int subdev_8255_cb_init(struct comedi_device *dev, struct comedi_subdevice *s, + int (*io)(struct comedi_device *dev, int dir, int port, + int data, unsigned long context), + unsigned long context); + unsigned long subdev_8255_regbase(struct comedi_subdevice *s); #endif diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h index 0a1150900ef3..c08416a7364b 100644 --- a/include/linux/comedi/comedidev.h +++ b/include/linux/comedi/comedidev.h @@ -633,7 +633,7 @@ extern const struct comedi_lrange range_unknown; */ struct comedi_lrange { int length; - struct comedi_krange range[]; + struct comedi_krange range[] __counted_by(length); }; /** diff --git a/include/linux/compat.h b/include/linux/compat.h index 1cfa4f0f490a..233f61ec8afc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -581,7 +581,6 @@ asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id, struct io_event __user *events, struct __kernel_timespec __user *timeout, const struct __compat_aio_sigset __user *usig); -asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 9b673fefcef8..ddab1ef22bee 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -14,11 +14,6 @@ #undef __cleanup #define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) -/* same as gcc, this was present in clang-2.6 so we can assume it works - * with any version that can compile the kernel - */ -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - /* all clang versions usable with the kernel support KASAN ABI version 5 */ #define KASAN_ABI_VERSION 5 diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7af9e34ec261..2ceba3fe4ec1 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -39,8 +39,6 @@ #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7779a18b24f..bb1339c7057b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -177,10 +177,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __asm__ ("" : "=r" (var) : "0" (var)) #endif -/* Not-quite-unique ID. */ -#ifndef __UNIQUE_ID -# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) -#endif +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) /** * data_race - mark an expression as containing intentional data races @@ -231,6 +228,14 @@ static inline void *offset_to_ptr(const int *off) #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) /* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + +/* * Whether 'type' is a signed type or an unsigned type. Supports scalar types, * bool and also pointer types. */ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 00efa35c350f..28566624f008 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -95,6 +95,19 @@ #endif /* + * Optional: only supported since gcc >= 14 + * Optional: only supported since clang >= 18 + * + * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 + * clang: https://reviews.llvm.org/D148381 + */ +#if __has_attribute(__counted_by__) +# define __counted_by(member) __attribute__((__counted_by__(member))) +#else +# define __counted_by(member) +#endif + +/* * Optional: not supported by gcc * Optional: only supported since clang >= 14.0 * @@ -130,19 +143,6 @@ #endif /* - * Optional: only supported since gcc >= 14 - * Optional: only supported since clang >= 17 - * - * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 - * clang: https://reviews.llvm.org/D148381 - */ -#if __has_attribute(__element_count__) -# define __counted_by(member) __attribute__((__element_count__(#member))) -#else -# define __counted_by(member) -#endif - -/* * Optional: only supported since clang >= 14.0 * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-error-function-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 547ea1ff806e..6f1ca49306d2 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -2,6 +2,15 @@ #ifndef __LINUX_COMPILER_TYPES_H #define __LINUX_COMPILER_TYPES_H +/* + * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. + * In the meantime, to support gcc < 10, we implement __has_builtin + * by hand. + */ +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + #ifndef __ASSEMBLY__ /* @@ -106,15 +115,32 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } #define __cold #endif -/* Builtins */ - /* - * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. - * In the meantime, to support gcc < 10, we implement __has_builtin - * by hand. + * On x86-64 and arm64 targets, __preserve_most changes the calling convention + * of a function to make the code in the caller as unintrusive as possible. This + * convention behaves identically to the C calling convention on how arguments + * and return values are passed, but uses a different set of caller- and callee- + * saved registers. + * + * The purpose is to alleviates the burden of saving and recovering a large + * register set before and after the call in the caller. This is beneficial for + * rarely taken slow paths, such as error-reporting functions that may be called + * from hot paths. + * + * Note: This may conflict with instrumentation inserted on function entry which + * does not use __preserve_most or equivalent convention (if in assembly). Since + * function tracing assumes the normal C calling convention, where the attribute + * is supported, __preserve_most implies notrace. It is recommended to restrict + * use of the attribute to functions that should or already disable tracing. + * + * Optional: not supported by gcc. + * + * clang: https://clang.llvm.org/docs/AttributeReference.html#preserve-most */ -#ifndef __has_builtin -#define __has_builtin(x) (0) +#if __has_attribute(__preserve_most__) && (defined(CONFIG_X86_64) || defined(CONFIG_ARM64)) +# define __preserve_most notrace __attribute__((__preserve_most__)) +#else +# define __preserve_most #endif /* Compiler specific macros. */ @@ -324,6 +350,18 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif +/* + * When the size of an allocated object is needed, use the best available + * mechanism to find it. (For cases where sizeof() cannot be used.) + */ +#if __has_builtin(__builtin_dynamic_object_size) +#define __struct_size(p) __builtin_dynamic_object_size(p, 0) +#define __member_size(p) __builtin_dynamic_object_size(p, 1) +#else +#define __struct_size(p) __builtin_object_size(p, 0) +#define __member_size(p) __builtin_object_size(p, 1) +#endif + #ifndef asm_volatile_goto #define asm_volatile_goto(x...) asm goto(x) #endif diff --git a/include/linux/completion.h b/include/linux/completion.h index 62b32b19e0a8..fb2915676574 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *x); extern bool completion_done(struct completion *x); extern void complete(struct completion *); +extern void complete_on_current_cpu(struct completion *x); extern void complete_all(struct completion *); #endif diff --git a/include/linux/connector.h b/include/linux/connector.h index 487350bb19c3..cec2d99ae902 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -90,13 +90,19 @@ void cn_del_callback(const struct cb_id *id); * If @group is not zero, then message will be delivered * to the specified group. * @gfp_mask: GFP mask. + * @filter: Filter function to be used at netlink layer. + * @filter_data:Filter data to be supplied to the filter function * * It can be safely called from softirq context, but may silently * fail under strong memory pressure. * * If there are no listeners for given group %-ESRCH can be returned. */ -int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp_t gfp_mask); +int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, + u32 group, gfp_t gfp_mask, + int (*filter)(struct sock *dsk, struct sk_buff *skb, + void *data), + void *filter_data); /** * cn_netlink_send - Sends message to the specified groups. diff --git a/include/linux/console.h b/include/linux/console.h index d3195664baa5..779d388af8a0 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -101,6 +101,13 @@ extern const struct consw dummy_con; /* dummy console buffer */ extern const struct consw vga_con; /* VGA text console */ extern const struct consw newport_con; /* SGI Newport console */ +struct screen_info; +#ifdef CONFIG_VGA_CONSOLE +void vgacon_register_screen(struct screen_info *si); +#else +static inline void vgacon_register_screen(struct screen_info *si) { } +#endif + int con_is_bound(const struct consw *csw); int do_unregister_con_driver(const struct consw *csw); int do_take_over_console(const struct consw *sw, int first, int last, int deflt); @@ -154,6 +161,10 @@ static inline int con_debug_leave(void) * receiving the printk spam for obvious reasons. * @CON_EXTENDED: The console supports the extended output format of * /dev/kmesg which requires a larger output buffer. + * @CON_SUSPENDED: Indicates if a console is suspended. If true, the + * printing callbacks must not be called. + * @CON_NBCON: Console can operate outside of the legacy style console_lock + * constraints. */ enum cons_flags { CON_PRINTBUFFER = BIT(0), @@ -163,6 +174,112 @@ enum cons_flags { CON_ANYTIME = BIT(4), CON_BRL = BIT(5), CON_EXTENDED = BIT(6), + CON_SUSPENDED = BIT(7), + CON_NBCON = BIT(8), +}; + +/** + * struct nbcon_state - console state for nbcon consoles + * @atom: Compound of the state fields for atomic operations + * + * @req_prio: The priority of a handover request + * @prio: The priority of the current owner + * @unsafe: Console is busy in a non takeover region + * @unsafe_takeover: A hostile takeover in an unsafe state happened in the + * past. The console cannot be safe until re-initialized. + * @cpu: The CPU on which the owner runs + * + * To be used for reading and preparing of the value stored in the nbcon + * state variable @console::nbcon_state. + * + * The @prio and @req_prio fields are particularly important to allow + * spin-waiting to timeout and give up without the risk of a waiter being + * assigned the lock after giving up. + */ +struct nbcon_state { + union { + unsigned int atom; + struct { + unsigned int prio : 2; + unsigned int req_prio : 2; + unsigned int unsafe : 1; + unsigned int unsafe_takeover : 1; + unsigned int cpu : 24; + }; + }; +}; + +/* + * The nbcon_state struct is used to easily create and interpret values that + * are stored in the @console::nbcon_state variable. Ensure this struct stays + * within the size boundaries of the atomic variable's underlying type in + * order to avoid any accidental truncation. + */ +static_assert(sizeof(struct nbcon_state) <= sizeof(int)); + +/** + * nbcon_prio - console owner priority for nbcon consoles + * @NBCON_PRIO_NONE: Unused + * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage + * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) + * @NBCON_PRIO_PANIC: Panic output + * @NBCON_PRIO_MAX: The number of priority levels + * + * A higher priority context can takeover the console when it is + * in the safe state. The final attempt to flush consoles in panic() + * can be allowed to do so even in an unsafe state (Hope and pray). + */ +enum nbcon_prio { + NBCON_PRIO_NONE = 0, + NBCON_PRIO_NORMAL, + NBCON_PRIO_EMERGENCY, + NBCON_PRIO_PANIC, + NBCON_PRIO_MAX, +}; + +struct console; +struct printk_buffers; + +/** + * struct nbcon_context - Context for console acquire/release + * @console: The associated console + * @spinwait_max_us: Limit for spin-wait acquire + * @prio: Priority of the context + * @allow_unsafe_takeover: Allow performing takeover even if unsafe. Can + * be used only with NBCON_PRIO_PANIC @prio. It + * might cause a system freeze when the console + * is used later. + * @backlog: Ringbuffer has pending records + * @pbufs: Pointer to the text buffer for this context + * @seq: The sequence number to print for this context + */ +struct nbcon_context { + /* members set by caller */ + struct console *console; + unsigned int spinwait_max_us; + enum nbcon_prio prio; + unsigned int allow_unsafe_takeover : 1; + + /* members set by emit */ + unsigned int backlog : 1; + + /* members set by acquire */ + struct printk_buffers *pbufs; + u64 seq; +}; + +/** + * struct nbcon_write_context - Context handed to the nbcon write callbacks + * @ctxt: The core console context + * @outbuf: Pointer to the text buffer for output + * @len: Length to write + * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred + */ +struct nbcon_write_context { + struct nbcon_context __private ctxt; + char *outbuf; + unsigned int len; + bool unsafe_takeover; }; /** @@ -184,6 +301,11 @@ enum cons_flags { * @dropped: Number of unreported dropped ringbuffer records * @data: Driver private data * @node: hlist node for the console list + * + * @write_atomic: Write callback for atomic context + * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print + * @pbufs: Pointer to nbcon private buffer */ struct console { char name[16]; @@ -203,6 +325,13 @@ struct console { unsigned long dropped; void *data; struct hlist_node node; + + /* nbcon console specific members */ + bool (*write_atomic)(struct console *con, + struct nbcon_write_context *wctxt); + atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; + struct printk_buffers *pbufs; }; #ifdef CONFIG_LOCKDEP @@ -329,6 +458,16 @@ static inline bool console_is_registered(const struct console *con) lockdep_assert_console_list_lock_held(); \ hlist_for_each_entry(con, &console_list, node) +#ifdef CONFIG_PRINTK +extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); +extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); +extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); +#else +static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +#endif + extern int console_set_on_cmdline; extern struct console *early_console; @@ -337,7 +476,7 @@ enum con_flush_mode { CONSOLE_REPLAY_ALL, }; -extern int add_preferred_console(char *name, int idx, char *options); +extern int add_preferred_console(const char *name, const short idx, char *options); extern void console_force_preferred_locked(struct console *con); extern void register_console(struct console *); extern int unregister_console(struct console *); diff --git a/include/linux/const.h b/include/linux/const.h index 435ddd72d2c4..81b8aae5a855 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -3,12 +3,4 @@ #include <vdso/const.h> -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - #endif /* _LINUX_CONST_H */ diff --git a/include/linux/coresight.h b/include/linux/coresight.h index bf70987240e4..a269fffaf991 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -6,6 +6,8 @@ #ifndef _LINUX_CORESIGHT_H #define _LINUX_CORESIGHT_H +#include <linux/amba/bus.h> +#include <linux/clk.h> #include <linux/device.h> #include <linux/io.h> #include <linux/perf_event.h> @@ -386,6 +388,63 @@ static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa, return csa->read(offset, true, false); } +#define CORESIGHT_CIDRn(i) (0xFF0 + ((i) * 4)) + +static inline u32 coresight_get_cid(void __iomem *base) +{ + u32 i, cid = 0; + + for (i = 0; i < 4; i++) + cid |= readl(base + CORESIGHT_CIDRn(i)) << (i * 8); + + return cid; +} + +static inline bool is_coresight_device(void __iomem *base) +{ + u32 cid = coresight_get_cid(base); + + return cid == CORESIGHT_CID; +} + +/* + * Attempt to find and enable "APB clock" for the given device + * + * Returns: + * + * clk - Clock is found and enabled + * NULL - clock is not found + * ERROR - Clock is found but failed to enable + */ +static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) +{ + struct clk *pclk; + int ret; + + pclk = clk_get(dev, "apb_pclk"); + if (IS_ERR(pclk)) + return NULL; + + ret = clk_prepare_enable(pclk); + if (ret) { + clk_put(pclk); + return ERR_PTR(ret); + } + return pclk; +} + +#define CORESIGHT_PIDRn(i) (0xFE0 + ((i) * 4)) + +static inline u32 coresight_get_pid(struct csdev_access *csa) +{ + u32 i, pid = 0; + + for (i = 0; i < 4; i++) + pid |= csdev_access_relaxed_read32(csa, CORESIGHT_PIDRn(i)) << (i * 8); + + return pid; +} + static inline u64 csdev_access_relaxed_read_pair(struct csdev_access *csa, u32 lo_offset, u32 hi_offset) { diff --git a/include/linux/counter.h b/include/linux/counter.h index b63746637de2..702e9108bbb4 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -399,7 +399,7 @@ struct counter_device { struct mutex ops_exist_lock; }; -void *counter_priv(const struct counter_device *const counter); +void *counter_priv(const struct counter_device *const counter) __attribute_const__; struct counter_device *counter_alloc(size_t sizeof_priv); void counter_put(struct counter_device *const counter); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e006c719182b..fc8094419084 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -18,6 +18,7 @@ #include <linux/compiler.h> #include <linux/cpumask.h> #include <linux/cpuhotplug.h> +#include <linux/cpu_smt.h> struct device; struct device_node; @@ -79,6 +80,8 @@ extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); +extern int arch_register_cpu(int cpu); +extern void arch_unregister_cpu(int cpu); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); @@ -152,6 +155,8 @@ static inline int remove_cpu(unsigned int cpu) { return -EPERM; } static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ +DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) + #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); @@ -194,7 +199,6 @@ void arch_cpu_finalize_init(void); static inline void arch_cpu_finalize_init(void) { } #endif -void cpu_set_state_online(int cpu); void play_idle_precise(u64 duration_ns, u64 latency_ns); static inline void play_idle(unsigned long duration_us) @@ -208,30 +212,6 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -enum cpuhp_smt_control { - CPU_SMT_ENABLED, - CPU_SMT_DISABLED, - CPU_SMT_FORCE_DISABLED, - CPU_SMT_NOT_SUPPORTED, - CPU_SMT_NOT_IMPLEMENTED, -}; - -#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) -extern enum cpuhp_smt_control cpu_smt_control; -extern void cpu_smt_disable(bool force); -extern void cpu_smt_check_topology(void); -extern bool cpu_smt_possible(void); -extern int cpuhp_smt_enable(void); -extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); -#else -# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) -static inline void cpu_smt_disable(bool force) { } -static inline void cpu_smt_check_topology(void) { } -static inline bool cpu_smt_possible(void) { return false; } -static inline int cpuhp_smt_enable(void) { return 0; } -static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } -#endif - extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); diff --git a/include/linux/cpu_smt.h b/include/linux/cpu_smt.h new file mode 100644 index 000000000000..0c1664294b57 --- /dev/null +++ b/include/linux/cpu_smt.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CPU_SMT_H_ +#define _LINUX_CPU_SMT_H_ + +enum cpuhp_smt_control { + CPU_SMT_ENABLED, + CPU_SMT_DISABLED, + CPU_SMT_FORCE_DISABLED, + CPU_SMT_NOT_SUPPORTED, + CPU_SMT_NOT_IMPLEMENTED, +}; + +#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) +extern enum cpuhp_smt_control cpu_smt_control; +extern unsigned int cpu_smt_num_threads; +extern void cpu_smt_disable(bool force); +extern void cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads); +extern bool cpu_smt_possible(void); +extern int cpuhp_smt_enable(void); +extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); +#else +# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) +# define cpu_smt_num_threads 1 +static inline void cpu_smt_disable(bool force) { } +static inline void cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads) { } +static inline bool cpu_smt_possible(void) { return false; } +static inline int cpuhp_smt_enable(void) { return 0; } +static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } +#endif + +#endif /* _LINUX_CPU_SMT_H_ */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 172ff51c1b2a..1c5ca92a0555 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -19,6 +19,7 @@ #include <linux/pm_qos.h> #include <linux/spinlock.h> #include <linux/sysfs.h> +#include <linux/minmax.h> /********************************************************************* * CPUFREQ INTERFACE * @@ -140,6 +141,9 @@ struct cpufreq_policy { */ bool dvfs_possible_from_any_cpu; + /* Per policy boost enabled flag. */ + bool boost_enabled; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; unsigned int cached_resolved_idx; @@ -370,7 +374,7 @@ struct cpufreq_driver { int (*target_intermediate)(struct cpufreq_policy *policy, unsigned int index); - /* should be defined, if possible */ + /* should be defined, if possible, return 0 on error */ unsigned int (*get)(unsigned int cpu); /* Called to update policy limits on firmware notifications. */ @@ -467,17 +471,8 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *poli unsigned int min, unsigned int max) { - if (policy->min < min) - policy->min = min; - if (policy->max < min) - policy->max = min; - if (policy->min > max) - policy->min = max; - if (policy->max > max) - policy->max = max; - if (policy->min > policy->max) - policy->min = policy->max; - return; + policy->max = clamp(policy->max, min, max); + policy->min = clamp(policy->min, min, policy->max); } static inline void @@ -1198,14 +1193,6 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ } #endif -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) -void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov); -#else -static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov) { } -#endif - extern unsigned int arch_freq_get_on_cpu(int cpu); #ifndef arch_set_freq_scale diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 25b6e6e6ba6b..efc0c0b07efb 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -48,7 +48,7 @@ * same section. * * If neither #1 nor #2 apply, please use the dynamic state space when - * setting up a state by using CPUHP_PREPARE_DYN or CPUHP_PREPARE_ONLINE + * setting up a state by using CPUHP_BP_PREPARE_DYN or CPUHP_AP_ONLINE_DYN * for the @state argument of the setup function. * * See Documentation/core-api/cpu_hotplug.rst for further information and @@ -90,7 +90,6 @@ enum cpuhp_state { CPUHP_FS_BUFF_DEAD, CPUHP_PRINTK_DEAD, CPUHP_MM_MEMCQ_DEAD, - CPUHP_XFS_DEAD, CPUHP_PERCPU_CNT_DEAD, CPUHP_RADIX_DEAD, CPUHP_PAGE_ALLOC, @@ -173,6 +172,7 @@ enum cpuhp_state { CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, + CPUHP_AP_ARM_ARCH_TIMER_EVTSTRM_STARTING, CPUHP_AP_ARM_GLOBAL_TIMER_STARTING, CPUHP_AP_JCORE_TIMER_STARTING, CPUHP_AP_ARM_TWD_STARTING, @@ -190,10 +190,12 @@ enum cpuhp_state { /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, + CPUHP_AP_ARM_XEN_RUNSTATE_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, + CPUHP_AP_HRTIMERS_DYING, CPUHP_AP_X86_TBOOT_DYING, CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, @@ -205,7 +207,6 @@ enum cpuhp_state { CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, - CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_BLK_MQ_ONLINE, CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f10fb87d49db..cfb545841a2c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -4,7 +4,7 @@ /* * Cpumasks provide a bitmap suitable for representing the - * set of CPU's in a system, one bit position per CPU number. In general, + * set of CPUs in a system, one bit position per CPU number. In general, * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ #include <linux/kernel.h> @@ -97,7 +97,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * - * The cpu_possible_mask is fixed at boot time, as the set of CPU id's + * The cpu_possible_mask is fixed at boot time, as the set of CPU IDs * that it is possible might ever be plugged in at anytime during the * life of that system boot. The cpu_present_mask is dynamic(*), * representing which CPUs are currently plugged in. And @@ -112,7 +112,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * hotplug, it's a copy of cpu_possible_mask, hence fixed at boot. * * Subtleties: - * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode + * 1) UP ARCHes (NR_CPUS == 1, CONFIG_SMP not defined) hardcode * assumption that their single CPU is online. The UP * cpu_{online,possible,present}_masks are placebos. Changing them * will have no useful affect on the following num_*_cpus() @@ -155,7 +155,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) * cpumask_first - get the first cpu in a cpumask * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { @@ -166,7 +166,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) * cpumask_first_zero - get the first unset cpu in a cpumask * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if all cpus are set. + * Return: >= nr_cpu_ids if all cpus are set. */ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { @@ -178,7 +178,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) * @srcp1: the first input * @srcp2: the second input * - * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + * Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) @@ -190,7 +190,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer * - * Returns >= nr_cpumask_bits if no CPUs set. + * Return: >= nr_cpumask_bits if no CPUs set. */ static inline unsigned int cpumask_last(const struct cpumask *srcp) { @@ -199,10 +199,10 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) /** * cpumask_next - get the next cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus set. + * Return: >= nr_cpu_ids if no further cpus set. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) @@ -215,10 +215,10 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) /** * cpumask_next_zero - get the next unset cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus unset. + * Return: >= nr_cpu_ids if no further cpus unset. */ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { @@ -254,11 +254,11 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp); /** * cpumask_next_and - get the next cpu in *src1p & *src2p - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @src1p: the first cpumask pointer * @src2p: the second cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus set in both. + * Return: >= nr_cpu_ids if no further cpus set in both. */ static inline unsigned int cpumask_next_and(int n, const struct cpumask *src1p, @@ -373,7 +373,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * @cpu: the cpu to ignore. * * Often used to find any cpu but smp_processor_id() in a mask. - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) @@ -388,11 +388,11 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) } /** - * cpumask_nth - get the first cpu in a cpumask + * cpumask_nth - get the Nth cpu in a cpumask * @srcp: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { @@ -400,12 +400,12 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s } /** - * cpumask_nth_and - get the first cpu in 2 cpumasks + * cpumask_nth_and - get the Nth cpu in 2 cpumasks * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, @@ -416,12 +416,12 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, } /** - * cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd. + * cpumask_nth_andnot - get the Nth cpu set in 1st cpumask, and clear in 2nd. * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, @@ -436,9 +436,9 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer * @srcp3: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static __always_inline unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1, @@ -497,7 +497,7 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in @cpumask, else returns false + * Return: true if @cpu is set in @cpumask, else returns false */ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { @@ -509,9 +509,9 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in old bitmap of @cpumask, else returns false - * * test_and_set_bit wrapper for cpumasks. + * + * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { @@ -523,9 +523,9 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in old bitmap of @cpumask, else returns false - * * test_and_clear_bit wrapper for cpumasks. + * + * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { @@ -560,7 +560,7 @@ static inline void cpumask_clear(struct cpumask *dstp) * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns false, else returns true + * Return: false if *@dstp is empty, else returns true */ static inline bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, @@ -603,7 +603,7 @@ static inline void cpumask_xor(struct cpumask *dstp, * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns false, else returns true + * Return: false if *@dstp is empty, else returns true */ static inline bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, @@ -617,6 +617,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp, * cpumask_equal - *src1p == *src2p * @src1p: the first input * @src2p: the second input + * + * Return: true if the cpumasks are equal, false if not */ static inline bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) @@ -630,6 +632,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * @src3p: the third input + * + * Return: true if first cpumask ORed with second cpumask == third cpumask, + * otherwise false */ static inline bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p, @@ -643,6 +648,9 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, * cpumask_intersects - (*src1p & *src2p) != 0 * @src1p: the first input * @src2p: the second input + * + * Return: true if first cpumask ANDed with second cpumask is non-empty, + * otherwise false */ static inline bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) @@ -656,7 +664,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * - * Returns true if *@src1p is a subset of *@src2p, else returns false + * Return: true if *@src1p is a subset of *@src2p, else returns false */ static inline bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) @@ -668,6 +676,8 @@ static inline bool cpumask_subset(const struct cpumask *src1p, /** * cpumask_empty - *srcp == 0 * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear. + * + * Return: true if srcp is empty (has no bits set), else false */ static inline bool cpumask_empty(const struct cpumask *srcp) { @@ -677,6 +687,8 @@ static inline bool cpumask_empty(const struct cpumask *srcp) /** * cpumask_full - *srcp == 0xFFFFFFFF... * @srcp: the cpumask to that all cpus < nr_cpu_ids are set. + * + * Return: true if srcp is full (has all bits set), else false */ static inline bool cpumask_full(const struct cpumask *srcp) { @@ -686,6 +698,8 @@ static inline bool cpumask_full(const struct cpumask *srcp) /** * cpumask_weight - Count of bits in *srcp * @srcp: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in *srcp */ static inline unsigned int cpumask_weight(const struct cpumask *srcp) { @@ -696,6 +710,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) * cpumask_weight_and - Count of bits in (*srcp1 & *srcp2) * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in both *srcp1 and *srcp2 */ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) @@ -744,7 +760,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * cpumask_any - pick a "random" cpu from *srcp * @srcp: the input cpumask * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ #define cpumask_any(srcp) cpumask_first(srcp) @@ -753,7 +769,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * @mask1: the first input cpumask * @mask2: the second input cpumask * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ #define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) @@ -769,7 +785,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * @len: the length of the buffer * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) @@ -783,7 +799,7 @@ static inline int cpumask_parse_user(const char __user *buf, int len, * @len: the length of the buffer * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) @@ -797,7 +813,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, * @buf: the buffer to extract from * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) { @@ -809,7 +825,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) * @buf: the buffer to extract from * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { @@ -817,7 +833,9 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) } /** - * cpumask_size - size to allocate for a 'struct cpumask' in bytes + * cpumask_size - calculate size to allocate for a 'struct cpumask' in bytes + * + * Return: size to allocate for a &struct cpumask in bytes */ static inline unsigned int cpumask_size(void) { @@ -831,7 +849,7 @@ static inline unsigned int cpumask_size(void) * little more difficult, we typedef cpumask_var_t to an array or a * pointer: doing &mask on an array is a noop, so it still works. * - * ie. + * i.e. * cpumask_var_t tmpmask; * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) * return -ENOMEM; @@ -887,6 +905,8 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) * a nop returning a constant 1 (in <linux/cpumask.h>). * * See alloc_cpumask_var_node. + * + * Return: %true if allocation succeeded, %false if not */ static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) @@ -1025,7 +1045,7 @@ set_cpu_dying(unsigned int cpu, bool dying) } /** - * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap * * There are a few places where cpumask_var_t isn't appropriate and @@ -1068,6 +1088,8 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) * interface gives only a momentary snapshot and is not protected against * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held * region. + * + * Return: momentary snapshot of the number of online CPUs */ static __always_inline unsigned int num_online_cpus(void) { @@ -1160,7 +1182,7 @@ static inline bool cpu_dying(unsigned int cpu) * @mask: the cpumask to copy * @buf: the buffer to copy into * - * Returns the length of the (null-terminated) @buf string, zero if + * Return: the length of the (null-terminated) @buf string, zero if * nothing is copied. */ static inline ssize_t @@ -1183,7 +1205,7 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * cpumask; Typically used by bin_attribute to export cpumask bitmask * ABI. * - * Returns the length of how many bytes have been copied, excluding + * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ static inline ssize_t @@ -1204,6 +1226,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, * * Everything is same with the above cpumap_print_bitmask_to_buf() * except the print format. + * + * Return: the length of how many bytes have been copied, excluding + * terminating '\0'. */ static inline ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index de62a722431e..5126a4fecb44 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -5,6 +5,14 @@ #include <linux/linkage.h> #include <linux/elfcore.h> #include <linux/elf.h> +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#include <asm/crash_core.h> +#endif + +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; #define CRASH_CORE_NOTE_NAME "CORE" #define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) @@ -28,6 +36,8 @@ VMCOREINFO_BYTES) typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; +/* Per cpu memory for storing cpu states in case of system crash. */ +extern note_buf_t __percpu *crash_notes; void crash_update_vmcoreinfo_safecopy(void *ptr); void crash_save_vmcoreinfo(void); @@ -77,11 +87,67 @@ Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); void final_note(Elf_Word *buf); +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#endif + int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); + unsigned long long *crash_size, unsigned long long *crash_base, + unsigned long long *low_size, bool *high); + +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#ifndef CRASH_ALIGN +#define CRASH_ALIGN SZ_2M +#endif +#ifndef CRASH_ADDR_LOW_MAX +#define CRASH_ADDR_LOW_MAX SZ_4G +#endif +#ifndef CRASH_ADDR_HIGH_MAX +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() +#endif + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); +#else +static inline void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{} +#endif + +/* Alignment required for elf header segment */ +#define ELF_CORE_HEADER_ALIGN 4096 + +struct crash_mem { + unsigned int max_nr_ranges; + unsigned int nr_ranges; + struct range ranges[] __counted_by(max_nr_ranges); +}; + +extern int crash_exclude_mem_range(struct crash_mem *mem, + unsigned long long mstart, + unsigned long long mend); +extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, + void **addr, unsigned long *sz); + +struct kimage; +struct kexec_segment; + +#define KEXEC_CRASH_HP_NONE 0 +#define KEXEC_CRASH_HP_ADD_CPU 1 +#define KEXEC_CRASH_HP_REMOVE_CPU 2 +#define KEXEC_CRASH_HP_ADD_MEMORY 3 +#define KEXEC_CRASH_HP_REMOVE_MEMORY 4 +#define KEXEC_CRASH_HP_INVALID_CPU -1U #endif /* LINUX_CRASH_CORE_H */ diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 0f3a656293b0..acc55626afdc 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -50,6 +50,7 @@ void vmcore_cleanup(void); #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) #endif +#ifndef is_kdump_kernel /* * is_kdump_kernel() checks whether this kernel is booting after a panic of * previous kernel or not. This is determined by checking if previous kernel @@ -64,6 +65,7 @@ static inline bool is_kdump_kernel(void) { return elfcorehdr_addr != ELFCORE_ADDR_MAX; } +#endif /* is_vmcore_usable() checks if the kernel is booting after a panic and * the vmcore region is usable. @@ -75,7 +77,8 @@ static inline bool is_kdump_kernel(void) static inline int is_vmcore_usable(void) { - return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; + return elfcorehdr_addr != ELFCORE_ADDR_ERR && + elfcorehdr_addr != ELFCORE_ADDR_MAX ? 1 : 0; } /* vmcore_unusable() marks the vmcore as unusable, @@ -84,8 +87,7 @@ static inline int is_vmcore_usable(void) static inline void vmcore_unusable(void) { - if (is_kdump_kernel()) - elfcorehdr_addr = ELFCORE_ADDR_ERR; + elfcorehdr_addr = ELFCORE_ADDR_ERR; } /** diff --git a/include/linux/cred.h b/include/linux/cred.h index 9ed9232af934..2976f534a7a3 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/key.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/uidgid.h> #include <linux/sched.h> #include <linux/sched/user.h> @@ -23,7 +24,7 @@ struct inode; * COW Supplementary groups list */ struct group_info { - atomic_t usage; + refcount_t usage; int ngroups; kgid_t gid[]; } __randomize_layout; @@ -39,7 +40,7 @@ struct group_info { */ static inline struct group_info *get_group_info(struct group_info *gi) { - atomic_inc(&gi->usage); + refcount_inc(&gi->usage); return gi; } @@ -49,7 +50,7 @@ static inline struct group_info *get_group_info(struct group_info *gi) */ #define put_group_info(group_info) \ do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ + if (refcount_dec_and_test(&(group_info)->usage)) \ groups_free(group_info); \ } while (0) @@ -108,14 +109,7 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) * same context as task->real_cred. */ struct cred { - atomic_t usage; -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_t subscribers; /* number of processes subscribed */ - void *put_addr; - unsigned magic; -#define CRED_MAGIC 0x43736564 -#define CRED_MAGIC_DEAD 0x44656144 -#endif + atomic_long_t usage; kuid_t uid; /* real UID of the task */ kgid_t gid; /* real GID of the task */ kuid_t suid; /* saved UID of the task */ @@ -164,7 +158,6 @@ extern void abort_creds(struct cred *); extern const struct cred *override_creds(const struct cred *); extern void revert_creds(const struct cred *); extern struct cred *prepare_kernel_cred(struct task_struct *); -extern int change_create_files_as(struct cred *, struct inode *); extern int set_security_override(struct cred *, u32); extern int set_security_override_from_ctx(struct cred *, const char *); extern int set_create_files_as(struct cred *, struct inode *); @@ -172,46 +165,6 @@ extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); extern int set_cred_ucounts(struct cred *); -/* - * check for validity of credentials - */ -#ifdef CONFIG_DEBUG_CREDENTIALS -extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); -extern void __validate_process_creds(struct task_struct *, - const char *, unsigned); - -extern bool creds_are_invalid(const struct cred *cred); - -static inline void __validate_creds(const struct cred *cred, - const char *file, unsigned line) -{ - if (unlikely(creds_are_invalid(cred))) - __invalid_creds(cred, file, line); -} - -#define validate_creds(cred) \ -do { \ - __validate_creds((cred), __FILE__, __LINE__); \ -} while(0) - -#define validate_process_creds() \ -do { \ - __validate_process_creds(current, __FILE__, __LINE__); \ -} while(0) - -extern void validate_creds_for_do_exit(struct task_struct *); -#else -static inline void validate_creds(const struct cred *cred) -{ -} -static inline void validate_creds_for_do_exit(struct task_struct *tsk) -{ -} -static inline void validate_process_creds(void) -{ -} -#endif - static inline bool cap_ambient_invariant_ok(const struct cred *cred) { return cap_issubset(cred->cap_ambient, @@ -220,6 +173,20 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) } /** + * get_new_cred_many - Get references on a new set of credentials + * @cred: The new credentials to reference + * @nr: Number of references to acquire + * + * Get references on the specified set of new credentials. The caller must + * release all acquired references. + */ +static inline struct cred *get_new_cred_many(struct cred *cred, int nr) +{ + atomic_long_add(nr, &cred->usage); + return cred; +} + +/** * get_new_cred - Get a reference on a new set of credentials * @cred: The new credentials to reference * @@ -228,16 +195,16 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred(struct cred *cred) { - atomic_inc(&cred->usage); - return cred; + return get_new_cred_many(cred, 1); } /** - * get_cred - Get a reference on a set of credentials + * get_cred_many - Get references on a set of credentials * @cred: The credentials to reference + * @nr: Number of references to acquire * - * Get a reference on the specified set of credentials. The caller must - * release the reference. If %NULL is passed, it is returned with no action. + * Get references on the specified set of credentials. The caller must release + * all acquired reference. If %NULL is passed, it is returned with no action. * * This is used to deal with a committed set of credentials. Although the * pointer is const, this will temporarily discard the const and increment the @@ -245,14 +212,27 @@ static inline struct cred *get_new_cred(struct cred *cred) * accidental alteration of a set of credentials that should be considered * immutable. */ -static inline const struct cred *get_cred(const struct cred *cred) +static inline const struct cred *get_cred_many(const struct cred *cred, int nr) { struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; - validate_creds(cred); nonconst_cred->non_rcu = 0; - return get_new_cred(nonconst_cred); + return get_new_cred_many(nonconst_cred, nr); +} + +/* + * get_cred - Get a reference on a set of credentials + * @cred: The credentials to reference + * + * Get a reference on the specified set of credentials. The caller must + * release the reference. If %NULL is passed, it is returned with no action. + * + * This is used to deal with a committed set of credentials. + */ +static inline const struct cred *get_cred(const struct cred *cred) +{ + return get_cred_many(cred, 1); } static inline const struct cred *get_cred_rcu(const struct cred *cred) @@ -260,9 +240,8 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; - if (!atomic_inc_not_zero(&nonconst_cred->usage)) + if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; - validate_creds(cred); nonconst_cred->non_rcu = 0; return cred; } @@ -270,6 +249,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) /** * put_cred - Release a reference to a set of credentials * @cred: The credentials to release + * @nr: Number of references to release * * Release a reference to a set of credentials, deleting them when the last ref * is released. If %NULL is passed, nothing is done. @@ -278,17 +258,28 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) * on task_struct are attached by const pointers to prevent accidental * alteration of otherwise immutable credential sets. */ -static inline void put_cred(const struct cred *_cred) +static inline void put_cred_many(const struct cred *_cred, int nr) { struct cred *cred = (struct cred *) _cred; if (cred) { - validate_creds(cred); - if (atomic_dec_and_test(&(cred)->usage)) + if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); } } +/* + * put_cred - Release a reference to a set of credentials + * @cred: The credentials to release + * + * Release a reference to a set of credentials, deleting them when the last ref + * is released. If %NULL is passed, nothing is done. + */ +static inline void put_cred(const struct cred *cred) +{ + put_cred_many(cred, 1); +} + /** * current_cred - Access the current task's subjective credentials * diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 31f6fee0c36c..b164da5e129e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -24,6 +24,7 @@ #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 #define CRYPTO_ALG_TYPE_AEAD 0x00000003 +#define CRYPTO_ALG_TYPE_LSKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_AKCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_SIG 0x00000007 @@ -35,8 +36,6 @@ #define CRYPTO_ALG_TYPE_SHASH 0x0000000e #define CRYPTO_ALG_TYPE_AHASH 0x0000000f -#define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e -#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 @@ -111,7 +110,6 @@ * crypto_aead_walksize() (with the remainder going at the end), no chunk * can cross a page boundary or a scatterlist element boundary. * ahash: - * - The result buffer must be aligned to the algorithm's alignmask. * - crypto_ahash_finup() must not be used unless the algorithm implements * ->finup() natively. */ @@ -279,18 +277,20 @@ struct compress_alg { * @cra_ctxsize: Size of the operational context of the transformation. This * value informs the kernel crypto API about the memory size * needed to be allocated for the transformation context. - * @cra_alignmask: Alignment mask for the input and output data buffer. The data - * buffer containing the input data for the algorithm must be - * aligned to this alignment mask. The data buffer for the - * output data must be aligned to this alignment mask. Note that - * the Crypto API will do the re-alignment in software, but - * only under special conditions and there is a performance hit. - * The re-alignment happens at these occasions for different - * @cra_u types: cipher -- For both input data and output data - * buffer; ahash -- For output hash destination buf; shash -- - * For output hash destination buf. - * This is needed on hardware which is flawed by design and - * cannot pick data from arbitrary addresses. + * @cra_alignmask: For cipher, skcipher, lskcipher, and aead algorithms this is + * 1 less than the alignment, in bytes, that the algorithm + * implementation requires for input and output buffers. When + * the crypto API is invoked with buffers that are not aligned + * to this alignment, the crypto API automatically utilizes + * appropriately aligned temporary buffers to comply with what + * the algorithm needs. (For scatterlists this happens only if + * the algorithm uses the skcipher_walk helper functions.) This + * misalignment handling carries a performance penalty, so it is + * preferred that algorithms do not set a nonzero alignmask. + * Also, crypto API users may wish to allocate buffers aligned + * to the alignmask of the algorithm being used, in order to + * avoid the API having to realign them. Note: the alignmask is + * not supported for hash algorithms and is always 0 for them. * @cra_priority: Priority of this transformation implementation. In case * multiple transformations with same @cra_name are available to * the Crypto API, the kernel will use the one with highest diff --git a/include/linux/damon.h b/include/linux/damon.h index d5d4d19928e0..e00ddf1ed39c 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -40,9 +40,24 @@ struct damon_addr_range { * @ar: The address range of the region. * @sampling_addr: Address of the sample for the next access check. * @nr_accesses: Access frequency of this region. + * @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for + * each sampling interval. * @list: List head for siblings. * @age: Age of this region. * + * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be + * increased for every &damon_attrs->sample_interval if an access to the region + * during the last sampling interval is found. The update of this field should + * not be done with direct access but with the helper function, + * damon_update_region_access_rate(). + * + * @nr_accesses_bp is another representation of @nr_accesses in basis point + * (1 in 10,000) that updated for every &damon_attrs->sample_interval in a + * manner similar to moving sum. By the algorithm, this value becomes + * @nr_accesses * 10000 for every &struct damon_attrs->aggr_interval. This can + * be used when the aggregation interval is too huge and therefore cannot wait + * for it before getting the access monitoring results. + * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two * regions are merged into a new region, both @nr_accesses and @age of the new @@ -52,6 +67,7 @@ struct damon_region { struct damon_addr_range ar; unsigned long sampling_addr; unsigned int nr_accesses; + unsigned int nr_accesses_bp; struct list_head list; unsigned int age; @@ -226,16 +242,26 @@ struct damos_stat { * enum damos_filter_type - Type of memory for &struct damos_filter * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. + * @DAMOS_FILTER_TYPE_ADDR: Address range. + * @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target. * @NR_DAMOS_FILTER_TYPES: Number of filter types. * - * The support of each filter type is up to running &struct damon_operations. - * &enum DAMON_OPS_PADDR is supporting all filter types, while - * &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR are not supporting any - * filter types. + * The anon pages type and memcg type filters are handled by underlying + * &struct damon_operations as a part of scheme action trying, and therefore + * accounted as 'tried'. In contrast, other types are handled by core layer + * before trying of the action and therefore not accounted as 'tried'. + * + * The support of the filters that handled by &struct damon_operations depend + * on the running &struct damon_operations. + * &enum DAMON_OPS_PADDR supports both anon pages type and memcg type filters, + * while &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR don't support any of + * the two types. */ enum damos_filter_type { DAMOS_FILTER_TYPE_ANON, DAMOS_FILTER_TYPE_MEMCG, + DAMOS_FILTER_TYPE_ADDR, + DAMOS_FILTER_TYPE_TARGET, NR_DAMOS_FILTER_TYPES, }; @@ -244,18 +270,24 @@ enum damos_filter_type { * @type: Type of the page. * @matching: If the matching page should filtered out or in. * @memcg_id: Memcg id of the question if @type is DAMOS_FILTER_MEMCG. + * @addr_range: Address range if @type is DAMOS_FILTER_TYPE_ADDR. + * @target_idx: Index of the &struct damon_target of + * &damon_ctx->adaptive_targets if @type is + * DAMOS_FILTER_TYPE_TARGET. * @list: List head for siblings. * * Before applying the &damos->action to a memory region, DAMOS checks if each * page of the region matches to this and avoid applying the action if so. - * Note that the check support is up to &struct damon_operations - * implementation. + * Support of each filter type depends on the running &struct damon_operations + * and the type. Refer to &enum damos_filter_type for more detai. */ struct damos_filter { enum damos_filter_type type; bool matching; union { unsigned short memcg_id; + struct damon_addr_range addr_range; + int target_idx; }; struct list_head list; }; @@ -282,24 +314,24 @@ struct damos_access_pattern { * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @pattern: Access pattern of target regions. * @action: &damo_action to be applied to the target regions. + * @apply_interval_us: The time between applying the @action. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. * @filters: Additional set of &struct damos_filter for &action. * @stat: Statistics of this scheme. * @list: List head for siblings. * - * For each aggregation interval, DAMON finds regions which fit in the + * For each @apply_interval_us, DAMON finds regions which fit in the * &pattern and applies &action to those. To avoid consuming too much * CPU time or IO resources for the &action, "a is used. * + * If @apply_interval_us is zero, &damon_attrs->aggr_interval is used instead. + * * To do the work only when needed, schemes can be activated for specific * system situations using &wmarks. If all schemes that registered to the * monitoring context are inactive, DAMON stops monitoring either, and just * repeatedly checks the watermarks. * - * If all schemes that registered to a &struct damon_ctx are inactive, DAMON - * stops monitoring and just repeatedly checks the watermarks. - * * Before applying the &action to a memory region, &struct damon_operations * implementation could check pages of the region and skip &action to respect * &filters @@ -311,6 +343,14 @@ struct damos_access_pattern { struct damos { struct damos_access_pattern pattern; enum damos_action action; + unsigned long apply_interval_us; +/* private: internal use only */ + /* + * number of sample intervals that should be passed before applying + * @action + */ + unsigned long next_apply_sis; +/* public: */ struct damos_quota quota; struct damos_watermarks wmarks; struct list_head filters; @@ -456,13 +496,14 @@ struct damon_callback { * regions. * * For each @sample_interval, DAMON checks whether each region is accessed or - * not. It aggregates and keeps the access information (number of accesses to - * each region) for @aggr_interval time. DAMON also checks whether the target - * memory regions need update (e.g., by ``mmap()`` calls from the application, - * in case of virtual memory monitoring) and applies the changes for each - * @ops_update_interval. All time intervals are in micro-seconds. - * Please refer to &struct damon_operations and &struct damon_callback for more - * detail. + * not during the last @sample_interval. If such access is found, DAMON + * aggregates the information by increasing &damon_region->nr_accesses for + * @aggr_interval time. For each @aggr_interval, the count is reset. DAMON + * also checks whether the target memory regions need update (e.g., by + * ``mmap()`` calls from the application, in case of virtual memory monitoring) + * and applies the changes for each @ops_update_interval. All time intervals + * are in micro-seconds. Please refer to &struct damon_operations and &struct + * damon_callback for more detail. */ struct damon_attrs { unsigned long sample_interval; @@ -506,8 +547,20 @@ struct damon_ctx { struct damon_attrs attrs; /* private: internal use only */ - struct timespec64 last_aggregation; - struct timespec64 last_ops_update; + /* number of sample intervals that passed since this context started */ + unsigned long passed_sample_intervals; + /* + * number of sample intervals that should be passed before next + * aggregation + */ + unsigned long next_aggregation_sis; + /* + * number of sample intervals that should be passed before next ops + * update + */ + unsigned long next_ops_update_sis; + /* for waiting until the execution of the kdamond_fn is started */ + struct completion kdamond_started; /* public: */ struct task_struct *kdamond; @@ -592,6 +645,8 @@ void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs); struct damos_filter *damos_new_filter(enum damos_filter_type type, bool matching); @@ -599,7 +654,9 @@ void damos_add_filter(struct damos *s, struct damos_filter *f); void damos_destroy_filter(struct damos_filter *f); struct damos *damon_new_scheme(struct damos_access_pattern *pattern, - enum damos_action action, struct damos_quota *quota, + enum damos_action action, + unsigned long apply_interval_us, + struct damos_quota *quota, struct damos_watermarks *wmarks); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); @@ -626,6 +683,13 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx) return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR; } +static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs) +{ + /* {aggr,sample}_interval are unsigned long, hence could overflow */ + return min(attrs->aggr_interval / attrs->sample_interval, + (unsigned long)UINT_MAX); +} + int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); diff --git a/include/linux/dax.h b/include/linux/dax.h index 261944ec0887..b463502b16e1 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -159,8 +159,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); -dax_entry_t dax_lock_page(struct page *page); -void dax_unlock_page(struct page *page, dax_entry_t cookie); +dax_entry_t dax_lock_folio(struct folio *folio); +void dax_unlock_folio(struct folio *folio, dax_entry_t cookie); dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, unsigned long index, struct page **page); void dax_unlock_mapping_entry(struct address_space *mapping, @@ -182,14 +182,14 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, return -EOPNOTSUPP; } -static inline dax_entry_t dax_lock_page(struct page *page) +static inline dax_entry_t dax_lock_folio(struct folio *folio) { - if (IS_DAX(page->mapping->host)) + if (IS_DAX(folio->mapping->host)) return ~0UL; return 0; } -static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) +static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie) { } @@ -241,10 +241,10 @@ void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); -vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, +vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, pfn_t *pfnp, int *errp, const struct iomap_ops *ops); vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, - enum page_entry_size pe_size, pfn_t pfn); + unsigned int order, pfn_t pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6b351e009f59..3da2f0545d5d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -251,6 +251,7 @@ extern struct dentry * d_make_root(struct inode *); /* <clickety>-<click> the ramfs-type tree */ extern void d_genocide(struct dentry *); +extern void d_mark_tmpfile(struct file *, struct inode *); extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index ea2d919fd9c7..c9c65b132c0f 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -171,6 +171,25 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, size_t count, loff_t *ppos); +/** + * struct debugfs_cancellation - cancellation data + * @list: internal, for keeping track + * @cancel: callback to call + * @cancel_data: extra data for the callback to call + */ +struct debugfs_cancellation { + struct list_head list; + void (*cancel)(struct dentry *, void *); + void *cancel_data; +}; + +void __acquires(cancellation) +debugfs_enter_cancellation(struct file *file, + struct debugfs_cancellation *cancellation); +void __releases(cancellation) +debugfs_leave_cancellation(struct file *file, + struct debugfs_cancellation *cancellation); + #else #include <linux/err.h> diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h index 9192986b1a73..ac862422df15 100644 --- a/include/linux/decompress/mm.h +++ b/include/linux/decompress/mm.h @@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size) if (!malloc_ptr) malloc_ptr = free_mem_ptr; - malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */ p = (void *)malloc_ptr; malloc_ptr += size; diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h index 8904063d4c9f..6bfe70decc9f 100644 --- a/include/linux/dev_printk.h +++ b/include/linux/dev_printk.h @@ -274,4 +274,6 @@ do { \ WARN_ONCE(condition, "%s %s: " format, \ dev_driver_string(dev), dev_name(dev), ## arg) +__printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); + #endif /* _DEVICE_PRINTK_H_ */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 69d0435c7ebb..772ab4d74d94 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -165,6 +165,7 @@ void dm_error(const char *message); struct dm_dev { struct block_device *bdev; + struct bdev_handle *bdev_handle; struct dax_device *dax_dev; blk_mode_t mode; char name[16]; diff --git a/include/linux/device.h b/include/linux/device.h index bbaeabd04b0d..d7a72a8749ea 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -349,6 +349,7 @@ unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); void devm_free_pages(struct device *dev, unsigned long addr); +#ifdef CONFIG_HAS_IOMEM void __iomem *devm_ioremap_resource(struct device *dev, const struct resource *res); void __iomem *devm_ioremap_resource_wc(struct device *dev, @@ -357,14 +358,39 @@ void __iomem *devm_ioremap_resource_wc(struct device *dev, void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index, resource_size_t *size); +#else + +static inline +void __iomem *devm_ioremap_resource(struct device *dev, + const struct resource *res) +{ + return ERR_PTR(-EINVAL); +} + +static inline +void __iomem *devm_ioremap_resource_wc(struct device *dev, + const struct resource *res) +{ + return ERR_PTR(-EINVAL); +} + +static inline +void __iomem *devm_of_iomap(struct device *dev, + struct device_node *node, int index, + resource_size_t *size) +{ + return ERR_PTR(-EINVAL); +} + +#endif /* allows to add/remove a custom action to devres stack */ void devm_remove_action(struct device *dev, void (*action)(void *), void *data); void devm_release_action(struct device *dev, void (*action)(void *), void *data); int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); -#define devm_add_action(release, action, data) \ - __devm_add_action(release, action, data, #action) +#define devm_add_action(dev, action, data) \ + __devm_add_action(dev, action, data, #action) static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), void *data, const char *name) @@ -377,8 +403,8 @@ static inline int __devm_add_action_or_reset(struct device *dev, void (*action)( return ret; } -#define devm_add_action_or_reset(release, action, data) \ - __devm_add_action_or_reset(release, action, data, #action) +#define devm_add_action_or_reset(dev, action, data) \ + __devm_add_action_or_reset(dev, action, data, #action) /** * devm_alloc_percpu - Resource-managed alloc_percpu @@ -625,7 +651,10 @@ struct device_physical_location { * @dma_pools: Dma pools (if dma'ble device). * @dma_mem: Internal for coherent mem override. * @cma_area: Contiguous memory area for dma allocations - * @dma_io_tlb_mem: Pointer to the swiotlb pool used. Not for driver use. + * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. + * @dma_io_tlb_pools: List of transient swiotlb memory pools. + * @dma_io_tlb_lock: Protects changes to the list of active pools. + * @dma_uses_io_tlb: %true if device has used the software IO TLB. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -732,6 +761,11 @@ struct device { #ifdef CONFIG_SWIOTLB struct io_tlb_mem *dma_io_tlb_mem; #endif +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct list_head dma_io_tlb_pools; + spinlock_t dma_io_tlb_lock; + bool dma_uses_io_tlb; +#endif /* arch specific additions */ struct dev_archdata archdata; @@ -1215,8 +1249,6 @@ void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); -__printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); - /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor)) diff --git a/include/linux/dlm_plock.h b/include/linux/dlm_plock.h index e6d76e8715a6..15fc856d198c 100644 --- a/include/linux/dlm_plock.h +++ b/include/linux/dlm_plock.h @@ -11,6 +11,8 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, int cmd, struct file_lock *fl); int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, struct file_lock *fl); +int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file, + struct file_lock *fl); int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, struct file_lock *fl); #endif diff --git a/include/linux/dm-verity-loadpin.h b/include/linux/dm-verity-loadpin.h index 552b817ab102..3ac6dbaeaa37 100644 --- a/include/linux/dm-verity-loadpin.h +++ b/include/linux/dm-verity-loadpin.h @@ -12,7 +12,7 @@ extern struct list_head dm_verity_loadpin_trusted_root_digests; struct dm_verity_loadpin_trusted_root_digest { struct list_head node; unsigned int len; - u8 data[]; + u8 data[] __counted_by(len); }; #if IS_ENABLED(CONFIG_SECURITY_LOADPIN_VERITY) diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 0d678e9a7b24..b3772edca2e6 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -499,6 +499,21 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, } /** + * dma_fence_is_later_or_same - return true if f1 is later or same as f2 + * @f1: the first fence from the same context + * @f2: the second fence from the same context + * + * Returns true if f1 is chronologically later than f2 or the same fence. Both + * fences must be from the same context, since a seqno is not re-used across + * contexts. + */ +static inline bool dma_fence_is_later_or_same(struct dma_fence *f1, + struct dma_fence *f2) +{ + return f1 == f2 || dma_fence_is_later(f1, f2); +} + +/** * dma_fence_later - return the chronologically later fence * @f1: the first fence from the same context * @f2: the second fence from the same context @@ -568,6 +583,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence, fence->error = error; } +/** + * dma_fence_timestamp - helper to get the completion timestamp of a fence + * @fence: fence to get the timestamp from. + * + * After a fence is signaled the timestamp is updated with the signaling time, + * but setting the timestamp can race with tasks waiting for the signaling. This + * helper busy waits for the correct timestamp to appear. + */ +static inline ktime_t dma_fence_timestamp(struct dma_fence *fence) +{ + if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) + return ktime_get(); + + while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) + cpu_relax(); + + return fence->timestamp; +} + signed long dma_fence_wait_timeout(struct dma_fence *, bool intr, signed long timeout); signed long dma_fence_wait_any_timeout(struct dma_fence **fences, diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 9bf19b5bf755..f2fc203fb8a1 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -169,12 +169,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, } #endif /* CONFIG_DMA_CMA*/ -#ifdef CONFIG_DMA_PERNUMA_CMA -void dma_pernuma_cma_reserve(void); -#else -static inline void dma_pernuma_cma_reserve(void) { } -#endif /* CONFIG_DMA_PERNUMA_CMA */ - #ifdef CONFIG_DMA_DECLARE_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size); @@ -343,6 +337,12 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK +void arch_dma_set_mask(struct device *dev, u64 mask); +#else +#define arch_dma_set_mask(dev, mask) do { } while (0) +#endif + #ifdef CONFIG_MMU /* * Page protection so that devices that can't snoop CPU caches can use the diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index e13050eb9777..4a658de44ee9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -144,6 +144,7 @@ bool dma_pci_p2pdma_supported(struct device *dev); int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); +bool dma_addressing_limited(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); @@ -264,6 +265,10 @@ static inline u64 dma_get_required_mask(struct device *dev) { return 0; } +static inline bool dma_addressing_limited(struct device *dev) +{ + return false; +} static inline size_t dma_max_mapping_size(struct device *dev) { return 0; @@ -418,6 +423,8 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) +bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); + static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { @@ -463,20 +470,6 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask) return dma_set_mask_and_coherent(dev, mask); } -/** - * dma_addressing_limited - return if the device is addressing limited - * @dev: device to check - * - * Return %true if the devices DMA mask is too small to address all memory in - * the system, else %false. Lack of addressing bits is the prime reason for - * bounce buffering, but might not be the only one. - */ -static inline bool dma_addressing_limited(struct device *dev) -{ - return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < - dma_get_required_mask(dev); -} - static inline unsigned int dma_get_max_seg_size(struct device *dev) { if (dev->dma_parms && dev->dma_parms->max_segment_size) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c3656e590213..3df70d6131c8 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -517,8 +517,6 @@ static inline const char *dma_chan_name(struct dma_chan *chan) return dev_name(&chan->dev->device); } -void dma_chan_cleanup(struct kref *kref); - /** * typedef dma_filter_fn - callback filter for dma_request_channel * @chan: channel to be reviewed diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 27dbd4c64860..e34b601b71fd 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -106,8 +106,6 @@ static inline bool dmar_rcu_check(void) extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); extern void dmar_register_bus_notifier(void); -extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, - struct dmar_dev_scope **devices, u16 segment); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); extern void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt); extern int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index b1d26f9f1c9f..9f183a679277 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -30,7 +30,7 @@ struct dnotify_struct { FS_MOVED_FROM | FS_MOVED_TO) extern void dnotify_flush(struct file *, fl_owner_t); -extern int fcntl_dirnotify(int, struct file *, unsigned long); +extern int fcntl_dirnotify(int, struct file *, unsigned int); #else @@ -38,7 +38,7 @@ static inline void dnotify_flush(struct file *filp, fl_owner_t id) { } -static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) +static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) { return -EINVAL; } diff --git a/include/linux/dpll.h b/include/linux/dpll.h new file mode 100644 index 000000000000..578fc5fa3750 --- /dev/null +++ b/include/linux/dpll.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + */ + +#ifndef __DPLL_H__ +#define __DPLL_H__ + +#include <uapi/linux/dpll.h> +#include <linux/device.h> +#include <linux/netlink.h> + +struct dpll_device; +struct dpll_pin; + +struct dpll_device_ops { + int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_mode *mode, struct netlink_ext_ack *extack); + bool (*mode_supported)(const struct dpll_device *dpll, void *dpll_priv, + const enum dpll_mode mode, + struct netlink_ext_ack *extack); + int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_lock_status *status, + struct netlink_ext_ack *extack); + int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv, + s32 *temp, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_ops { + int (*frequency_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u64 frequency, + struct netlink_ext_ack *extack); + int (*frequency_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 *frequency, struct netlink_ext_ack *extack); + int (*direction_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const enum dpll_pin_direction direction, + struct netlink_ext_ack *extack); + int (*direction_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack); + int (*state_on_pin_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_pin_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*prio_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u32 *prio, struct netlink_ext_ack *extack); + int (*prio_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u32 prio, struct netlink_ext_ack *extack); + int (*phase_offset_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s64 *phase_offset, + struct netlink_ext_ack *extack); + int (*phase_adjust_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s32 *phase_adjust, + struct netlink_ext_ack *extack); + int (*phase_adjust_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const s32 phase_adjust, + struct netlink_ext_ack *extack); +}; + +struct dpll_pin_frequency { + u64 min; + u64 max; +}; + +#define DPLL_PIN_FREQUENCY_RANGE(_min, _max) \ + { \ + .min = _min, \ + .max = _max, \ + } + +#define DPLL_PIN_FREQUENCY(_val) DPLL_PIN_FREQUENCY_RANGE(_val, _val) +#define DPLL_PIN_FREQUENCY_1PPS \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_1_HZ) +#define DPLL_PIN_FREQUENCY_10MHZ \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_MHZ) +#define DPLL_PIN_FREQUENCY_IRIG_B \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_KHZ) +#define DPLL_PIN_FREQUENCY_DCF77 \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ) + +struct dpll_pin_phase_adjust_range { + s32 min; + s32 max; +}; + +struct dpll_pin_properties { + const char *board_label; + const char *panel_label; + const char *package_label; + enum dpll_pin_type type; + unsigned long capabilities; + u32 freq_supported_num; + struct dpll_pin_frequency *freq_supported; + struct dpll_pin_phase_adjust_range phase_range; +}; + +#if IS_ENABLED(CONFIG_DPLL) +size_t dpll_msg_pin_handle_size(struct dpll_pin *pin); +int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin); +#else +static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +{ + return 0; +} + +static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +{ + return 0; +} +#endif + +struct dpll_device * +dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module); + +void dpll_device_put(struct dpll_device *dpll); + +int dpll_device_register(struct dpll_device *dpll, enum dpll_type type, + const struct dpll_device_ops *ops, void *priv); + +void dpll_device_unregister(struct dpll_device *dpll, + const struct dpll_device_ops *ops, void *priv); + +struct dpll_pin * +dpll_pin_get(u64 clock_id, u32 dev_driver_id, struct module *module, + const struct dpll_pin_properties *prop); + +int dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_put(struct dpll_pin *pin); + +int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +int dpll_device_change_ntf(struct dpll_device *dpll); + +int dpll_pin_change_ntf(struct dpll_pin *pin); + +#endif diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index c177322f793d..b9dd35d4b8f5 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -28,7 +28,7 @@ /* Source and Destination MAC of follow-up meta frames. * Whereas the choice of SMAC only affects the unique identification of the * switch as sender of meta frames, the DMAC must be an address that is present - * in the DSA master port's multicast MAC filter. + * in the DSA conduit port's multicast MAC filter. * 01-80-C2-00-00-0E is a good choice for this, as all profiles of IEEE 1588 * over L2 use this address for some purpose already. */ diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 061dd84d09f3..4fcbf4d4fd0a 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -37,10 +37,12 @@ struct _ddebug { #define _DPRINTK_FLAGS_INCL_FUNCNAME (1<<2) #define _DPRINTK_FLAGS_INCL_LINENO (1<<3) #define _DPRINTK_FLAGS_INCL_TID (1<<4) +#define _DPRINTK_FLAGS_INCL_SOURCENAME (1<<5) #define _DPRINTK_FLAGS_INCL_ANY \ (_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\ - _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID) + _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID |\ + _DPRINTK_FLAGS_INCL_SOURCENAME) #if defined DEBUG #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT diff --git a/include/linux/efi.h b/include/linux/efi.h index ab088c662e88..9cc5bf32f6f2 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -24,10 +24,11 @@ #include <linux/range.h> #include <linux/reboot.h> #include <linux/uuid.h> -#include <linux/screen_info.h> #include <asm/page.h> +struct screen_info; + #define EFI_SUCCESS 0 #define EFI_LOAD_ERROR ( 1 | (1UL << (BITS_PER_LONG-1))) #define EFI_INVALID_PARAMETER ( 2 | (1UL << (BITS_PER_LONG-1))) @@ -357,13 +358,10 @@ void efi_native_runtime_setup(void); * where the UEFI SPEC breaks the line. */ #define NULL_GUID EFI_GUID(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) -#define MPS_TABLE_GUID EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define SMBIOS_TABLE_GUID EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define SMBIOS3_TABLE_GUID EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94) -#define SAL_SYSTEM_TABLE_GUID EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) -#define HCDP_TABLE_GUID EFI_GUID(0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98) #define UGA_IO_PROTOCOL_GUID EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2) #define EFI_GLOBAL_VARIABLE_GUID EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c) #define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) @@ -726,7 +724,6 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, return EFI_SUCCESS; } #endif -extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int __init __efi_memmap_init(struct efi_memory_map_data *data); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); @@ -851,10 +848,6 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len) return 1; } -#ifdef CONFIG_EFI_PCDP -extern int __init efi_setup_pcdp_console(char *); -#endif - /* * We play games with efi_enabled so that the compiler will, if * possible, remove EFI-related code altogether. @@ -1130,7 +1123,7 @@ extern bool efi_runtime_disabled(void); static inline bool efi_runtime_disabled(void) { return true; } #endif -extern void efi_call_virt_check_flags(unsigned long flags, const char *call); +extern void efi_call_virt_check_flags(unsigned long flags, const void *caller); extern unsigned long efi_call_virt_save_flags(void); enum efi_secureboot_mode { @@ -1171,8 +1164,7 @@ static inline void efi_check_for_embedded_firmwares(void) { } #define arch_efi_call_virt(p, f, args...) ((p)->f(args)) /* - * Arch code can implement the following three template macros, avoiding - * reptition for the void/non-void return cases of {__,}efi_call_virt(): + * Arch code must implement the following three routines: * * * arch_efi_call_virt_setup() * @@ -1181,9 +1173,8 @@ static inline void efi_check_for_embedded_firmwares(void) { } * * * arch_efi_call_virt() * - * Performs the call. The last expression in the macro must be the call - * itself, allowing the logic to be shared by the void and non-void - * cases. + * Performs the call. This routine takes a variable number of arguments so + * it must be implemented as a variadic preprocessor macro. * * * arch_efi_call_virt_teardown() * @@ -1192,33 +1183,20 @@ static inline void efi_check_for_embedded_firmwares(void) { } #define efi_call_virt_pointer(p, f, args...) \ ({ \ - efi_status_t __s; \ + typeof((p)->f(args)) __s; \ unsigned long __flags; \ \ arch_efi_call_virt_setup(); \ \ __flags = efi_call_virt_save_flags(); \ __s = arch_efi_call_virt(p, f, args); \ - efi_call_virt_check_flags(__flags, __stringify(f)); \ + efi_call_virt_check_flags(__flags, NULL); \ \ arch_efi_call_virt_teardown(); \ \ __s; \ }) -#define __efi_call_virt_pointer(p, f, args...) \ -({ \ - unsigned long __flags; \ - \ - arch_efi_call_virt_setup(); \ - \ - __flags = efi_call_virt_save_flags(); \ - arch_efi_call_virt(p, f, args); \ - efi_call_virt_check_flags(__flags, __stringify(f)); \ - \ - arch_efi_call_virt_teardown(); \ -}) - #define EFI_RANDOM_SEED_SIZE 32U // BLAKE2S_HASH_SIZE struct linux_efi_random_seed { @@ -1244,6 +1222,10 @@ extern int efi_tpm_final_log_size; extern unsigned long rci2_table_phys; +efi_status_t +efi_call_acpi_prm_handler(efi_status_t (__efiapi *handler_addr)(u64, void *), + u64 param_buffer_addr, void *context); + /* * efi_runtime_service() function identifiers. * "NONE" is used by efi_recover_from_page_fault() to check if the page @@ -1263,25 +1245,26 @@ enum efi_rts_ids { EFI_RESET_SYSTEM, EFI_UPDATE_CAPSULE, EFI_QUERY_CAPSULE_CAPS, + EFI_ACPI_PRM_HANDLER, }; +union efi_rts_args; + /* * efi_runtime_work: Details of EFI Runtime Service work - * @arg<1-5>: EFI Runtime Service function arguments + * @args: Pointer to union describing the arguments * @status: Status of executing EFI Runtime Service * @efi_rts_id: EFI Runtime Service function identifier * @efi_rts_comp: Struct used for handling completions + * @caller: The caller of the runtime service */ struct efi_runtime_work { - void *arg1; - void *arg2; - void *arg3; - void *arg4; - void *arg5; - efi_status_t status; - struct work_struct work; - enum efi_rts_ids efi_rts_id; - struct completion efi_rts_comp; + union efi_rts_args *args; + efi_status_t status; + struct work_struct work; + enum efi_rts_ids efi_rts_id; + struct completion efi_rts_comp; + const void *caller; }; extern struct efi_runtime_work efi_rts_work; diff --git a/include/linux/elf-fdpic.h b/include/linux/elf-fdpic.h index 3bea95a1af53..e533f4513194 100644 --- a/include/linux/elf-fdpic.h +++ b/include/linux/elf-fdpic.h @@ -10,13 +10,25 @@ #include <uapi/linux/elf-fdpic.h> +#if ELF_CLASS == ELFCLASS32 +#define Elf_Sword Elf32_Sword +#define elf_fdpic_loadseg elf32_fdpic_loadseg +#define elf_fdpic_loadmap elf32_fdpic_loadmap +#define ELF_FDPIC_LOADMAP_VERSION ELF32_FDPIC_LOADMAP_VERSION +#else +#define Elf_Sword Elf64_Sxword +#define elf_fdpic_loadmap elf64_fdpic_loadmap +#define elf_fdpic_loadseg elf64_fdpic_loadseg +#define ELF_FDPIC_LOADMAP_VERSION ELF64_FDPIC_LOADMAP_VERSION +#endif + /* * binfmt binary parameters structure */ struct elf_fdpic_params { struct elfhdr hdr; /* ref copy of ELF header */ struct elf_phdr *phdrs; /* ref copy of PT_PHDR table */ - struct elf32_fdpic_loadmap *loadmap; /* loadmap to be passed to userspace */ + struct elf_fdpic_loadmap *loadmap; /* loadmap to be passed to userspace */ unsigned long elfhdr_addr; /* mapped ELF header user address */ unsigned long ph_addr; /* mapped PT_PHDR user address */ unsigned long map_addr; /* mapped loadmap user address */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 62b61527bcc4..689028257fcc 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1045,11 +1045,30 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, /** * ethtool_sprintf - Write formatted string to ethtool string data - * @data: Pointer to start of string to update + * @data: Pointer to a pointer to the start of string to update * @fmt: Format of string to write * - * Write formatted string to data. Update data to point at start of + * Write formatted string to *data. Update *data to point at start of * next string. */ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); + +/* Link mode to forced speed capabilities maps */ +struct ethtool_forced_speed_map { + u32 speed; + __ETHTOOL_DECLARE_LINK_MODE_MASK(caps); + + const u32 *cap_arr; + u32 arr_size; +}; + +#define ETHTOOL_FORCED_SPEED_MAP(prefix, value) \ +{ \ + .speed = SPEED_##value, \ + .cap_arr = prefix##_##value, \ + .arr_size = ARRAY_SIZE(prefix##_##value), \ +} + +void +ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size); #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/evm.h b/include/linux/evm.h index 7dc1ee74169f..01fc495a83e2 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -56,9 +56,10 @@ static inline void evm_inode_post_set_acl(struct dentry *dentry, { return evm_inode_post_setxattr(dentry, acl_name, NULL, 0); } -extern int evm_inode_init_security(struct inode *inode, - const struct xattr *xattr_array, - struct xattr *evm); + +int evm_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct xattr *xattrs, + int *xattr_count); extern bool evm_revalidate_status(const char *xattr_name); extern int evm_protected_xattr_if_enabled(const char *req_xattr_name); extern int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer, @@ -157,9 +158,10 @@ static inline void evm_inode_post_set_acl(struct dentry *dentry, return; } -static inline int evm_inode_init_security(struct inode *inode, - const struct xattr *xattr_array, - struct xattr *evm) +static inline int evm_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + struct xattr *xattrs, + int *xattr_count) { return 0; } diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index 1c849db953a5..69501e0ec239 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -50,8 +50,8 @@ " .previous" "\n" \ ) -#ifdef CONFIG_IA64 -#define KSYM_FUNC(name) @fptr(name) +#if defined(CONFIG_PARISC) && defined(CONFIG_64BIT) +#define KSYM_FUNC(name) P%name #else #define KSYM_FUNC(name) name #endif diff --git a/include/linux/export.h b/include/linux/export.h index beed8387e0a4..9911508a9604 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -50,7 +50,7 @@ extern struct module __this_module; __EXPORT_SYMBOL_REF(sym) ASM_NL \ .previous -#if !defined(CONFIG_MODULES) || defined(__DISABLE_EXPORTS) +#if defined(__DISABLE_EXPORTS) /* * Allow symbol exports to be disabled completely so that C code may @@ -75,7 +75,7 @@ extern struct module __this_module; __ADDRESSABLE(sym) \ asm(__stringify(___EXPORT_SYMBOL(sym, license, ns))) -#endif /* CONFIG_MODULES */ +#endif #ifdef DEFAULT_SYMBOL_NAMESPACE #define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE)) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 11fbd0ee1370..bb37ad5cc954 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -99,12 +99,29 @@ enum fid_type { FILEID_FAT_WITH_PARENT = 0x72, /* + * 64 bit inode number, 32 bit generation number. + */ + FILEID_INO64_GEN = 0x81, + + /* + * 64 bit inode number, 32 bit generation number, + * 64 bit parent inode number, 32 bit parent generation. + */ + FILEID_INO64_GEN_PARENT = 0x82, + + /* * 128 bit child FID (struct lu_fid) * 128 bit parent FID (struct lu_fid) */ FILEID_LUSTRE = 0x97, /* + * 64 bit inode number, 32 bit subvolume, 32 bit generation number: + */ + FILEID_BCACHEFS_WITHOUT_PARENT = 0xb1, + FILEID_BCACHEFS_WITH_PARENT = 0xb2, + + /* * 64 bit unique kernfs id */ FILEID_KERNFS = 0xfe, @@ -123,7 +140,11 @@ struct fid { u32 parent_ino; u32 parent_gen; } i32; - struct { + struct { + u64 ino; + u32 gen; + } __packed i64; + struct { u32 block; u16 partref; u16 parent_partref; @@ -224,15 +245,56 @@ struct export_operations { atomic attribute updates */ #define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ +#define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */ unsigned long flags; }; +/** + * exportfs_lock_op_is_async() - export op supports async lock operation + * @export_ops: the nfs export operations to check + * + * Returns true if the nfs export_operations structure has + * EXPORT_OP_ASYNC_LOCK in their flags set + */ +static inline bool +exportfs_lock_op_is_async(const struct export_operations *export_ops) +{ + return export_ops->flags & EXPORT_OP_ASYNC_LOCK; +} + extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent, int flags); extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int flags); +static inline bool exportfs_can_encode_fid(const struct export_operations *nop) +{ + return !nop || nop->encode_fh; +} + +static inline bool exportfs_can_decode_fh(const struct export_operations *nop) +{ + return nop && nop->fh_to_dentry; +} + +static inline bool exportfs_can_encode_fh(const struct export_operations *nop, + int fh_flags) +{ + /* + * If a non-decodeable file handle was requested, we only need to make + * sure that filesystem did not opt-out of encoding fid. + */ + if (fh_flags & EXPORT_FH_FID) + return exportfs_can_encode_fid(nop); + + /* + * If a decodeable file handle was requested, we need to make sure that + * filesystem can also decode file handles. + */ + return exportfs_can_decode_fh(nop); +} + static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid, int *max_len) { @@ -252,10 +314,12 @@ extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, /* * Generic helpers for filesystems. */ -extern struct dentry *generic_fh_to_dentry(struct super_block *sb, +int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len, + struct inode *parent); +struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); -extern struct dentry *generic_fh_to_parent(struct super_block *sb, +struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 3c45c3846fe9..e596a0abcb27 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -328,16 +328,4 @@ struct extcon_specific_cable_nb { struct extcon_dev *edev; unsigned long previous_value; }; - -static inline int extcon_register_interest(struct extcon_specific_cable_nb *obj, - const char *extcon_name, const char *cable_name, - struct notifier_block *nb) -{ - return -EINVAL; -} - -static inline int extcon_unregister_interest(struct extcon_specific_cable_nb *obj) -{ - return -EINVAL; -} #endif /* __LINUX_EXTCON_H__ */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a82a4bb6ce68..039fe0ce8d83 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -13,10 +13,10 @@ #define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ #define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */ -#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */ -#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */ -#define F2FS_BLKSIZE 4096 /* support only 4KB block */ -#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ +#define F2FS_MAX_LOG_SECTOR_SIZE PAGE_SHIFT /* Max is Block Size */ +#define F2FS_LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9) /* log number for sector/blk */ +#define F2FS_BLKSIZE PAGE_SIZE /* support only block == page */ +#define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_EXTENSION_LEN 8 /* max size of extension */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) @@ -104,6 +104,7 @@ enum f2fs_error { ERROR_CORRUPTED_VERITY_XATTR, ERROR_CORRUPTED_XATTR, ERROR_INVALID_NODE_REFERENCE, + ERROR_INCONSISTENT_NAT, ERROR_MAX, }; @@ -210,14 +211,14 @@ struct f2fs_checkpoint { unsigned char sit_nat_version_bitmap[]; } __packed; -#define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */ +#define CP_CHKSUM_OFFSET (F2FS_BLKSIZE - sizeof(__le32)) /* default chksum offset in checkpoint */ #define CP_MIN_CHKSUM_OFFSET \ (offsetof(struct f2fs_checkpoint, sit_nat_version_bitmap)) /* * For orphan inode management */ -#define F2FS_ORPHANS_PER_BLOCK 1020 +#define F2FS_ORPHANS_PER_BLOCK ((F2FS_BLKSIZE - 4 * sizeof(__le32)) / sizeof(__le32)) #define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \ F2FS_ORPHANS_PER_BLOCK) @@ -243,14 +244,31 @@ struct f2fs_extent { #define F2FS_NAME_LEN 255 /* 200 bytes for inline xattrs by default */ #define DEFAULT_INLINE_XATTR_ADDRS 50 -#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ + +#define OFFSET_OF_END_OF_I_EXT 360 +#define SIZE_OF_I_NID 20 + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode number */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +/* Address Pointers in an Inode */ +#define DEF_ADDRS_PER_INODE ((F2FS_BLKSIZE - OFFSET_OF_END_OF_I_EXT \ + - SIZE_OF_I_NID \ + - sizeof(struct node_footer)) / sizeof(__le32)) #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) #define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ #define ADDRS_PER_INODE(inode) addrs_per_inode(inode) -#define DEF_ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +/* Address Pointers in a Direct Block */ +#define DEF_ADDRS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) #define ADDRS_PER_BLOCK(inode) addrs_per_block(inode) -#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +/* Node IDs in an Indirect Block */ +#define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) #define ADDRS_PER_PAGE(page, inode) \ (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode)) @@ -342,14 +360,6 @@ enum { #define OFFSET_BIT_MASK GENMASK(OFFSET_BIT_SHIFT - 1, 0) -struct node_footer { - __le32 nid; /* node id */ - __le32 ino; /* inode number */ - __le32 flag; /* include cold/fsync/dentry marks and offset */ - __le64 cp_ver; /* checkpoint version */ - __le32 next_blkaddr; /* next node page block address */ -} __packed; - struct f2fs_node { /* can be one of three types: inode, direct, and indirect types */ union { @@ -363,7 +373,7 @@ struct f2fs_node { /* * For NAT entries */ -#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) +#define NAT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_nat_entry)) struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ @@ -378,12 +388,13 @@ struct f2fs_nat_block { /* * For SIT entries * - * Each segment is 2MB in size by default so that a bitmap for validity of - * there-in blocks should occupy 64 bytes, 512 bits. + * A validity bitmap of 64 bytes covers 512 blocks of area. For a 4K page size, + * this results in a segment size of 2MB. For 16k pages, the default segment size + * is 8MB. * Not allow to change this. */ #define SIT_VBLOCK_MAP_SIZE 64 -#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) +#define SIT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_sit_entry)) /* * F2FS uses 4 bytes to represent block address. As a result, supported size of @@ -418,7 +429,7 @@ struct f2fs_sit_block { * For segment summary * * One summary block contains exactly 512 summary entries, which represents - * exactly 2MB segment by default. Not allow to change the basic units. + * exactly one segment by default. Not allow to change the basic units. * * NOTE: For initializing fields, you must use set_summary * @@ -429,12 +440,12 @@ struct f2fs_sit_block { * from node's page's beginning to get a data block address. * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) */ -#define ENTRIES_IN_SUM 512 +#define ENTRIES_IN_SUM (F2FS_BLKSIZE / 8) #define SUMMARY_SIZE (7) /* sizeof(struct summary) */ #define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ #define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) -/* a summary entry for a 4KB-sized block in a segment */ +/* a summary entry for a block in a segment */ struct f2fs_summary { __le32 nid; /* parent node id */ union { @@ -518,7 +529,7 @@ struct f2fs_journal { }; } __packed; -/* 4KB-sized summary block structure */ +/* Block-sized summary block structure */ struct f2fs_summary_block { struct f2fs_summary entries[ENTRIES_IN_SUM]; struct f2fs_journal journal; @@ -559,11 +570,14 @@ typedef __le32 f2fs_hash_t; * Note: there are more reserved space in inline dentry than in regular * dentry, when converting inline dentry we should handle this carefully. */ -#define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */ + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK ((BITS_PER_BYTE * F2FS_BLKSIZE) / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * BITS_PER_BYTE + 1)) #define SIZE_OF_DIR_ENTRY 11 /* by byte */ #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) -#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ +#define SIZE_OF_RESERVED (F2FS_BLKSIZE - ((SIZE_OF_DIR_ENTRY + \ F2FS_SLOT_LEN) * \ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) #define MIN_INLINE_DENTRY_SIZE 40 /* just include '.' and '..' entries */ @@ -576,7 +590,7 @@ struct f2fs_dir_entry { __u8 file_type; /* file type */ } __packed; -/* 4KB-sized directory entry block */ +/* Block-sized directory entry block */ struct f2fs_dentry_block { /* validity bitmap for directory entries in each block */ __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; diff --git a/include/linux/fb.h b/include/linux/fb.h index ce7d588edc3e..94e2c44c6569 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -383,7 +383,6 @@ struct fb_tile_ops { #endif /* CONFIG_FB_TILEBLITTING */ /* FBINFO_* = fb_info.flags bit flags */ -#define FBINFO_DEFAULT 0 #define FBINFO_HWACCEL_DISABLED 0x0002 /* When FBINFO_HWACCEL_DISABLED is set: * Hardware acceleration is turned off. Software implementations @@ -481,7 +480,9 @@ struct fb_info { const struct fb_ops *fbops; struct device *device; /* This is the parent */ +#if defined(CONFIG_FB_DEVICE) struct device *dev; /* This is this fb device */ +#endif int class_flag; /* private sysfs flags */ #ifdef CONFIG_FB_TILEBLITTING struct fb_tile_ops *tileops; /* Tile Blitting */ @@ -502,8 +503,6 @@ struct fb_info { bool skip_vt_switch; /* no VT switch on suspend/resume required */ }; -#define FBINFO_FLAG_DEFAULT FBINFO_DEFAULT - /* This will go away * fbset currently hacks in FB_ACCELF_TEXT into var.accel_flags * when it wants to turn the acceleration engine on. This is @@ -527,7 +526,7 @@ extern int fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var); extern int fb_blank(struct fb_info *info, int blank); /* - * Drawing operations where framebuffer is in I/O memory + * Helpers for framebuffers in I/O memory */ extern void cfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect); @@ -538,29 +537,25 @@ extern ssize_t fb_io_read(struct fb_info *info, char __user *buf, extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos); -/* - * Initializes struct fb_ops for framebuffers in I/O memory. - */ - -#define __FB_DEFAULT_IO_OPS_RDWR \ +#define __FB_DEFAULT_IOMEM_OPS_RDWR \ .fb_read = fb_io_read, \ .fb_write = fb_io_write -#define __FB_DEFAULT_IO_OPS_DRAW \ +#define __FB_DEFAULT_IOMEM_OPS_DRAW \ .fb_fillrect = cfb_fillrect, \ .fb_copyarea = cfb_copyarea, \ .fb_imageblit = cfb_imageblit -#define __FB_DEFAULT_IO_OPS_MMAP \ +#define __FB_DEFAULT_IOMEM_OPS_MMAP \ .fb_mmap = NULL /* default implementation */ -#define FB_DEFAULT_IO_OPS \ - __FB_DEFAULT_IO_OPS_RDWR, \ - __FB_DEFAULT_IO_OPS_DRAW, \ - __FB_DEFAULT_IO_OPS_MMAP +#define FB_DEFAULT_IOMEM_OPS \ + __FB_DEFAULT_IOMEM_OPS_RDWR, \ + __FB_DEFAULT_IOMEM_OPS_DRAW, \ + __FB_DEFAULT_IOMEM_OPS_MMAP /* - * Drawing operations where framebuffer is in system RAM + * Helpers for framebuffers in system memory */ extern void sys_fillrect(struct fb_info *info, const struct fb_fillrect *rect); @@ -571,32 +566,31 @@ extern ssize_t fb_sys_read(struct fb_info *info, char __user *buf, extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos); +#define __FB_DEFAULT_SYSMEM_OPS_RDWR \ + .fb_read = fb_sys_read, \ + .fb_write = fb_sys_write + +#define __FB_DEFAULT_SYSMEM_OPS_DRAW \ + .fb_fillrect = sys_fillrect, \ + .fb_copyarea = sys_copyarea, \ + .fb_imageblit = sys_imageblit + /* - * Initializes struct fb_ops for framebuffers in system memory. + * Helpers for framebuffers in DMA-able memory */ -#define __FB_DEFAULT_SYS_OPS_RDWR \ +#define __FB_DEFAULT_DMAMEM_OPS_RDWR \ .fb_read = fb_sys_read, \ .fb_write = fb_sys_write -#define __FB_DEFAULT_SYS_OPS_DRAW \ +#define __FB_DEFAULT_DMAMEM_OPS_DRAW \ .fb_fillrect = sys_fillrect, \ .fb_copyarea = sys_copyarea, \ .fb_imageblit = sys_imageblit -#define __FB_DEFAULT_SYS_OPS_MMAP \ - .fb_mmap = NULL /* default implementation */ - -#define FB_DEFAULT_SYS_OPS \ - __FB_DEFAULT_SYS_OPS_RDWR, \ - __FB_DEFAULT_SYS_OPS_DRAW, \ - __FB_DEFAULT_SYS_OPS_MMAP - -/* drivers/video/fbmem.c */ +/* fbmem.c */ extern int register_framebuffer(struct fb_info *fb_info); extern void unregister_framebuffer(struct fb_info *fb_info); -extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); -extern int fb_show_logo(struct fb_info *fb_info, int rotate); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height, u32 shift_high, u32 shift_low, u32 mod); @@ -607,10 +601,6 @@ extern int fb_get_color_depth(struct fb_var_screeninfo *var, extern int fb_get_options(const char *name, char **option); extern int fb_new_modelist(struct fb_info *info); -extern bool fb_center_logo; -extern int fb_logo_count; -extern struct class *fb_class; - static inline void lock_fb_info(struct fb_info *info) { mutex_lock(&info->lock); @@ -636,7 +626,7 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, } } -/* drivers/video/fb_defio.c */ +/* fb_defio.c */ int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma); extern int fb_deferred_io_init(struct fb_info *info); extern void fb_deferred_io_open(struct fb_info *info, @@ -687,11 +677,11 @@ extern int fb_deferred_io_fsync(struct file *file, loff_t start, __damage_area(info, image->dx, image->dy, image->width, image->height); \ } -#define FB_GEN_DEFAULT_DEFERRED_IO_OPS(__prefix, __damage_range, __damage_area) \ +#define FB_GEN_DEFAULT_DEFERRED_IOMEM_OPS(__prefix, __damage_range, __damage_area) \ __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, io) \ __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, cfb) -#define FB_GEN_DEFAULT_DEFERRED_SYS_OPS(__prefix, __damage_range, __damage_area) \ +#define FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(__prefix, __damage_range, __damage_area) \ __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, sys) \ __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, sys) @@ -735,14 +725,11 @@ static inline bool fb_be_math(struct fb_info *info) #endif /* CONFIG_FB_FOREIGN_ENDIAN */ } -/* drivers/video/fbsysfs.c */ extern struct fb_info *framebuffer_alloc(size_t size, struct device *dev); extern void framebuffer_release(struct fb_info *info); -extern int fb_init_device(struct fb_info *fb_info); -extern void fb_cleanup_device(struct fb_info *head); extern void fb_bl_default_curve(struct fb_info *fb_info, u8 off, u8 min, u8 max); -/* drivers/video/fbmon.c */ +/* fbmon.c */ #define FB_MAXTIMINGS 0 #define FB_VSYNCTIMINGS 1 #define FB_HSYNCTIMINGS 2 @@ -776,7 +763,7 @@ extern int of_get_fb_videomode(struct device_node *np, extern int fb_videomode_from_videomode(const struct videomode *vm, struct fb_videomode *fbmode); -/* drivers/video/modedb.c */ +/* modedb.c */ #define VESA_MODEDB_SIZE 43 #define DMT_SIZE 0x50 @@ -802,7 +789,7 @@ extern void fb_videomode_to_modelist(const struct fb_videomode *modedb, int num, extern const struct fb_videomode *fb_find_best_display(const struct fb_monspecs *specs, struct list_head *head); -/* drivers/video/fbcmap.c */ +/* fbcmap.c */ extern int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp); extern int fb_alloc_cmap_gfp(struct fb_cmap *cmap, int len, int transp, gfp_t flags); extern void fb_dealloc_cmap(struct fb_cmap *cmap); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index e066816f3519..bc4c3287a65e 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -98,20 +98,9 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un return files_lookup_fd_raw(files, fd); } -static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd) -{ - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), - "suspicious rcu_dereference_check() usage"); - return files_lookup_fd_raw(files, fd); -} - -static inline struct file *lookup_fd_rcu(unsigned int fd) -{ - return files_lookup_fd_rcu(current->files, fd); -} - -struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd); -struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd); +struct file *lookup_fdget_rcu(unsigned int fd); +struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd); +struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd); struct task_struct; diff --git a/include/linux/filelock.h b/include/linux/filelock.h index efcdd1631d9b..95e868e09e29 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -144,7 +144,7 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int, struct flock64 *); #endif -int fcntl_setlease(unsigned int fd, struct file *filp, long arg); +int fcntl_setlease(unsigned int fd, struct file *filp, int arg); int fcntl_getlease(struct file *filp); /* fs/locks.c */ @@ -167,8 +167,8 @@ bool vfs_inode_has_locks(struct inode *inode); int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); void lease_get_mtime(struct inode *, struct timespec64 *time); -int generic_setlease(struct file *, long, struct file_lock **, void **priv); -int vfs_setlease(struct file *, long, struct file_lock **, void **); +int generic_setlease(struct file *, int, struct file_lock **, void **priv); +int vfs_setlease(struct file *, int, struct file_lock **, void **); int lease_modify(struct file_lock *, int, struct list_head *); struct notifier_block; @@ -213,7 +213,7 @@ static inline int fcntl_setlk64(unsigned int fd, struct file *file, return -EACCES; } #endif -static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) +static inline int fcntl_setlease(unsigned int fd, struct file *filp, int arg) { return -EINVAL; } @@ -306,13 +306,13 @@ static inline void lease_get_mtime(struct inode *inode, return; } -static inline int generic_setlease(struct file *filp, long arg, +static inline int generic_setlease(struct file *filp, int arg, struct file_lock **flp, void **priv) { return -EINVAL; } -static inline int vfs_setlease(struct file *filp, long arg, +static inline int vfs_setlease(struct file *filp, int arg, struct file_lock **lease, void **priv) { return -EINVAL; diff --git a/include/linux/filter.h b/include/linux/filter.h index f69114083ec7..a4953fafc8cb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -69,6 +69,9 @@ struct ctl_table_header; /* unused opcode to mark special load instruction. Same as BPF_ABS */ #define BPF_PROBE_MEM 0x20 +/* unused opcode to mark special ldsx instruction. Same as BPF_IND */ +#define BPF_PROBE_MEMSX 0x40 + /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 @@ -90,39 +93,49 @@ struct ctl_table_header; /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ -#define BPF_ALU64_REG(OP, DST, SRC) \ +#define BPF_ALU64_REG_OFF(OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ - .off = 0, \ + .off = OFF, \ .imm = 0 }) -#define BPF_ALU32_REG(OP, DST, SRC) \ +#define BPF_ALU64_REG(OP, DST, SRC) \ + BPF_ALU64_REG_OFF(OP, DST, SRC, 0) + +#define BPF_ALU32_REG_OFF(OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ - .off = 0, \ + .off = OFF, \ .imm = 0 }) +#define BPF_ALU32_REG(OP, DST, SRC) \ + BPF_ALU32_REG_OFF(OP, DST, SRC, 0) + /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ -#define BPF_ALU64_IMM(OP, DST, IMM) \ +#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU64_IMM(OP, DST, IMM) \ + BPF_ALU64_IMM_OFF(OP, DST, IMM, 0) -#define BPF_ALU32_IMM(OP, DST, IMM) \ +#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU32_IMM(OP, DST, IMM) \ + BPF_ALU32_IMM_OFF(OP, DST, IMM, 0) /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ @@ -134,6 +147,16 @@ struct ctl_table_header; .off = 0, \ .imm = LEN }) +/* Byte Swap, bswap16/32/64 */ + +#define BPF_BSWAP(DST, LEN) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE), \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = LEN }) + /* Short form of mov, dst_reg = src_reg */ #define BPF_MOV64_REG(DST, SRC) \ @@ -170,6 +193,24 @@ struct ctl_table_header; .off = 0, \ .imm = IMM }) +/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */ + +#define BPF_MOVSX64_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_MOVSX32_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Special form of mov32, used for doing explicit zero extension on dst. */ #define BPF_ZEXT_REG(DST) \ ((struct bpf_insn) { \ @@ -254,6 +295,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = OFF, \ .imm = 0 }) +/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */ + +#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ @@ -685,7 +736,7 @@ static inline void bpf_compute_and_save_data_end( cb->data_end = skb->data + skb_headlen(skb); } -/* Restore data saved by bpf_compute_data_pointers(). */ +/* Restore data saved by bpf_compute_and_save_data_end(). */ static inline void bpf_restore_data_end( struct sk_buff *skb, void *saved_data_end) { @@ -765,23 +816,6 @@ DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); u32 xdp_master_redirect(struct xdp_buff *xdp); -static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, - struct xdp_buff *xdp) -{ - /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus - * under local_bh_disable(), which provides the needed RCU protection - * for accessing map entries. - */ - u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); - - if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) { - if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev)) - act = xdp_master_redirect(xdp); - } - - return act; -} - void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) @@ -920,6 +954,8 @@ bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); +bool bpf_jit_supports_exceptions(void); +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) @@ -989,12 +1025,6 @@ int xdp_do_redirect_frame(struct net_device *dev, struct bpf_prog *prog); void xdp_do_flush(void); -/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as - * it is no longer only flushing maps. Keep this define for compatibility - * until all drivers are updated - do not use xdp_do_flush_map() in new code! - */ -#define xdp_do_flush_map xdp_do_flush - void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET @@ -1135,6 +1165,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); +struct bpf_prog *bpf_prog_ksym_find(unsigned long addr); static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, @@ -1202,6 +1233,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, return -ERANGE; } +static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) +{ + return NULL; +} + static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) @@ -1293,6 +1329,7 @@ struct bpf_sock_addr_kern { */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ + u32 uaddrlen; }; struct bpf_sock_ops_kern { @@ -1580,10 +1617,9 @@ static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) return NULL; } -static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, - unsigned long len, bool flush) +static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, + unsigned long len, bool flush) { - return NULL; } #endif /* CONFIG_NET */ diff --git a/include/linux/firmware/imx/dsp.h b/include/linux/firmware/imx/dsp.h index 4f7895a3b73c..1f176a2683fe 100644 --- a/include/linux/firmware/imx/dsp.h +++ b/include/linux/firmware/imx/dsp.h @@ -37,17 +37,11 @@ struct imx_dsp_ipc { static inline void imx_dsp_set_data(struct imx_dsp_ipc *ipc, void *data) { - if (!ipc) - return; - ipc->private_data = data; } static inline void *imx_dsp_get_data(struct imx_dsp_ipc *ipc) { - if (!ipc) - return NULL; - return ipc->private_data; } diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h index 5cc63fe7e84d..df17196df5ff 100644 --- a/include/linux/firmware/imx/sci.h +++ b/include/linux/firmware/imx/sci.h @@ -21,31 +21,37 @@ int imx_scu_enable_general_irq_channel(struct device *dev); int imx_scu_irq_register_notifier(struct notifier_block *nb); int imx_scu_irq_unregister_notifier(struct notifier_block *nb); int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable); +int imx_scu_irq_get_status(u8 group, u32 *irq_status); int imx_scu_soc_init(struct device *dev); #else static inline int imx_scu_soc_init(struct device *dev) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int imx_scu_enable_general_irq_channel(struct device *dev) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int imx_scu_irq_register_notifier(struct notifier_block *nb) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int imx_scu_irq_unregister_notifier(struct notifier_block *nb) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable) { - return -ENOTSUPP; + return -EOPNOTSUPP; +} + +static inline int imx_scu_irq_get_status(u8 group, u32 *irq_status) +{ + return -EOPNOTSUPP; } #endif #endif /* _SC_SCI_H */ diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h index a718f853d457..ee80ca4bb0d0 100644 --- a/include/linux/firmware/intel/stratix10-smc.h +++ b/include/linux/firmware/intel/stratix10-smc.h @@ -467,6 +467,31 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FIRMWARE_VERSION) /** + * SMC call protocol for Mailbox, starting FUNCID from 60 + * + * Call register usage: + * a0 INTEL_SIP_SMC_MBOX_SEND_CMD + * a1 mailbox command code + * a2 physical address that contain mailbox command data (not include header) + * a3 mailbox command data size in word + * a4 set to 0 for CASUAL, set to 1 for URGENT + * a5 physical address for secure firmware to put response data + * (not include header) + * a6 maximum size in word of physical address to store response data + * a7 not used + * + * Return status + * a0 INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_STATUS_REJECTED or + * INTEL_SIP_SMC_STATUS_ERROR + * a1 mailbox error code + * a2 response data length in word + * a3 not used + */ +#define INTEL_SIP_SMC_FUNCID_MBOX_SEND_CMD 60 + #define INTEL_SIP_SMC_MBOX_SEND_CMD \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_MBOX_SEND_CMD) + +/** * Request INTEL_SIP_SMC_SVC_VERSION * * Sync call used to query the SIP SMC API Version diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index 0c16037fd08d..60ed82112680 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -118,6 +118,9 @@ struct stratix10_svc_chan; * @COMMAND_SMC_SVC_VERSION: Non-mailbox SMC SVC API Version, * return status is SVC_STATUS_OK * + * @COMMAND_MBOX_SEND_CMD: send generic mailbox command, return status is + * SVC_STATUS_OK or SVC_STATUS_ERROR + * * @COMMAND_RSU_DCMF_STATUS: query firmware for the DCMF status * return status is SVC_STATUS_OK or SVC_STATUS_ERROR * @@ -164,6 +167,8 @@ enum stratix10_svc_command_code { COMMAND_FCS_RANDOM_NUMBER_GEN, /* for general status poll */ COMMAND_POLL_SERVICE_STATUS = 40, + /* for generic mailbox send command */ + COMMAND_MBOX_SEND_CMD = 100, /* Non-mailbox SMC Call */ COMMAND_SMC_SVC_VERSION = 200, }; diff --git a/include/linux/firmware/mediatek/mtk-adsp-ipc.h b/include/linux/firmware/mediatek/mtk-adsp-ipc.h index 28fd313340b8..5b1d16fa3f56 100644 --- a/include/linux/firmware/mediatek/mtk-adsp-ipc.h +++ b/include/linux/firmware/mediatek/mtk-adsp-ipc.h @@ -46,17 +46,11 @@ struct mtk_adsp_ipc { static inline void mtk_adsp_ipc_set_data(struct mtk_adsp_ipc *ipc, void *data) { - if (!ipc) - return; - ipc->private_data = data; } static inline void *mtk_adsp_ipc_get_data(struct mtk_adsp_ipc *ipc) { - if (!ipc) - return NULL; - return ipc->private_data; } diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h index 95b0da2326a9..8eaf8922ab02 100644 --- a/include/linux/firmware/meson/meson_sm.h +++ b/include/linux/firmware/meson/meson_sm.h @@ -19,7 +19,7 @@ enum { struct meson_sm_firmware; int meson_sm_call(struct meson_sm_firmware *fw, unsigned int cmd_index, - u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); + s32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); int meson_sm_call_write(struct meson_sm_firmware *fw, void *buffer, unsigned int b_size, unsigned int cmd_index, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h new file mode 100644 index 000000000000..5c28298a98be --- /dev/null +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Qualcomm Secure Execution Environment (SEE) interface (QSEECOM). + * Responsible for setting up and managing QSEECOM client devices. + * + * Copyright (C) 2023 Maximilian Luz <luzmaximilian@gmail.com> + */ + +#ifndef __QCOM_QSEECOM_H +#define __QCOM_QSEECOM_H + +#include <linux/auxiliary_bus.h> +#include <linux/types.h> + +#include <linux/firmware/qcom/qcom_scm.h> + +/** + * struct qseecom_client - QSEECOM client device. + * @aux_dev: Underlying auxiliary device. + * @app_id: ID of the loaded application. + */ +struct qseecom_client { + struct auxiliary_device aux_dev; + u32 app_id; +}; + +/** + * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. + * @client: The QSEECOM client associated with the target app. + * @req: Request buffer sent to the app (must be DMA-mappable). + * @req_size: Size of the request buffer. + * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp_size: Size of the response buffer. + * + * Sends a request to the QSEE app associated with the given client and read + * back its response. The caller must provide two DMA memory regions, one for + * the request and one for the response, and fill out the @req region with the + * respective (app-specific) request data. The QSEE app reads this and returns + * its response in the @rsp region. + * + * Note: This is a convenience wrapper around qcom_scm_qseecom_app_send(). + * Clients should prefer to use this wrapper. + * + * Return: Zero on success, nonzero on failure. + */ +static inline int qcom_qseecom_app_send(struct qseecom_client *client, void *req, size_t req_size, + void *rsp, size_t rsp_size) +{ + return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size); +} + +#endif /* __QCOM_QSEECOM_H */ diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 250ea4efb7cb..ccaf28846054 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -59,12 +59,12 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) #define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) -extern bool qcom_scm_is_available(void); +bool qcom_scm_is_available(void); -extern int qcom_scm_set_cold_boot_addr(void *entry); -extern int qcom_scm_set_warm_boot_addr(void *entry); -extern void qcom_scm_cpu_power_down(u32 flags); -extern int qcom_scm_set_remote_state(u32 state, u32 id); +int qcom_scm_set_cold_boot_addr(void *entry); +int qcom_scm_set_warm_boot_addr(void *entry); +void qcom_scm_cpu_power_down(u32 flags); +int qcom_scm_set_remote_state(u32 state, u32 id); struct qcom_scm_pas_metadata { void *ptr; @@ -72,54 +72,69 @@ struct qcom_scm_pas_metadata { ssize_t size; }; -extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size, - struct qcom_scm_pas_metadata *ctx); +int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size, + struct qcom_scm_pas_metadata *ctx); void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); -extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size); -extern int qcom_scm_pas_auth_and_reset(u32 peripheral); -extern int qcom_scm_pas_shutdown(u32 peripheral); -extern bool qcom_scm_pas_supported(u32 peripheral); - -extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); -extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); - -extern bool qcom_scm_restore_sec_cfg_available(void); -extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); -extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); -extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); -extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); -extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size); -extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - u64 *src, - const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt); - -extern bool qcom_scm_ocmem_lock_available(void); -extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode); -extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size); - -extern bool qcom_scm_ice_available(void); -extern int qcom_scm_ice_invalidate_key(u32 index); -extern int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size); - -extern bool qcom_scm_hdcp_available(void); -extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp); - -extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); -extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); - -extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version); -extern int qcom_scm_lmh_profile_change(u32 profile_id); -extern bool qcom_scm_lmh_dcvsh_available(void); +int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size); +int qcom_scm_pas_auth_and_reset(u32 peripheral); +int qcom_scm_pas_shutdown(u32 peripheral); +bool qcom_scm_pas_supported(u32 peripheral); + +int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); +int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); + +bool qcom_scm_restore_sec_cfg_available(void); +int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); +int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); +int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, + u32 cp_nonpixel_start, u32 cp_nonpixel_size); +int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, u64 *src, + const struct qcom_scm_vmperm *newvm, + unsigned int dest_cnt); + +bool qcom_scm_ocmem_lock_available(void); +int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, u32 size, + u32 mode); +int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, u32 size); + +bool qcom_scm_ice_available(void); +int qcom_scm_ice_invalidate_key(u32 index); +int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, + enum qcom_scm_ice_cipher cipher, u32 data_unit_size); + +bool qcom_scm_hdcp_available(void); +int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); + +int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); +int qcom_scm_qsmmu500_wait_safe_toggle(bool en); + +int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, + u64 limit_node, u32 node_id, u64 version); +int qcom_scm_lmh_profile_change(u32 profile_id); +bool qcom_scm_lmh_dcvsh_available(void); + +#ifdef CONFIG_QCOM_QSEECOM + +int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); +int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, + size_t rsp_size); + +#else /* CONFIG_QCOM_QSEECOM */ + +static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) +{ + return -EINVAL; +} + +static inline int qcom_scm_qseecom_app_send(u32 app_id, void *req, + size_t req_size, void *rsp, + size_t rsp_size) +{ + return -EINVAL; +} + +#endif /* CONFIG_QCOM_QSEECOM */ #endif diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 9dda7d9898ff..d1ea3898564c 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -34,6 +34,19 @@ /* PM API versions */ #define PM_API_VERSION_2 2 +#define PM_PINCTRL_PARAM_SET_VERSION 2 + +#define ZYNQMP_FAMILY_CODE 0x23 +#define VERSAL_FAMILY_CODE 0x26 + +/* When all subfamily of platform need to support */ +#define ALL_SUB_FAMILY_CODE 0x00 +#define VERSAL_SUB_FAMILY_CODE 0x01 +#define VERSALNET_SUB_FAMILY_CODE 0x03 + +#define FAMILY_CODE_MASK GENMASK(27, 21) +#define SUB_FAMILY_CODE_MASK GENMASK(20, 19) + /* ATF only commands */ #define TF_A_PM_REGISTER_SGI 0xa04 #define PM_GET_TRUSTZONE_VERSION 0xa03 @@ -87,6 +100,18 @@ #define SD_ITAPDLY 0xFF180314 #define SD_OTAPDLYSEL 0xFF180318 +/** + * XPM_EVENT_ERROR_MASK_DDRMC_CR: Error event mask for DDRMC MC Correctable ECC Error. + */ +#define XPM_EVENT_ERROR_MASK_DDRMC_CR BIT(18) + +/** + * XPM_EVENT_ERROR_MASK_DDRMC_NCR: Error event mask for DDRMC MC Non-Correctable ECC Error. + */ +#define XPM_EVENT_ERROR_MASK_DDRMC_NCR BIT(19) +#define XPM_EVENT_ERROR_MASK_NOC_NCR BIT(13) +#define XPM_EVENT_ERROR_MASK_NOC_CR BIT(12) + enum pm_api_cb_id { PM_INIT_SUSPEND_CB = 30, PM_ACKNOWLEDGE_CB = 31, diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index da51a83b2829..79ef6ac4c021 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -93,13 +93,9 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #if __has_builtin(__builtin_dynamic_object_size) #define POS __pass_dynamic_object_size(1) #define POS0 __pass_dynamic_object_size(0) -#define __struct_size(p) __builtin_dynamic_object_size(p, 0) -#define __member_size(p) __builtin_dynamic_object_size(p, 1) #else #define POS __pass_object_size(1) #define POS0 __pass_object_size(0) -#define __struct_size(p) __builtin_object_size(p, 0) -#define __member_size(p) __builtin_object_size(p, 1) #endif #define __compiletime_lessthan(bounds, length) ( \ @@ -643,7 +639,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, __q_size_field, #op), \ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ __fortify_size, \ - "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \ + "field \"" #p "\" at " FILE_LINE, \ __p_size_field); \ __underlying_##op(p, q, __fortify_size); \ }) diff --git a/include/linux/freelist.h b/include/linux/freelist.h deleted file mode 100644 index fc1842b96469..000000000000 --- a/include/linux/freelist.h +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ -#ifndef FREELIST_H -#define FREELIST_H - -#include <linux/atomic.h> - -/* - * Copyright: cameron@moodycamel.com - * - * A simple CAS-based lock-free free list. Not the fastest thing in the world - * under heavy contention, but simple and correct (assuming nodes are never - * freed until after the free list is destroyed), and fairly speedy under low - * contention. - * - * Adapted from: https://moodycamel.com/blog/2014/solving-the-aba-problem-for-lock-free-free-lists - */ - -struct freelist_node { - atomic_t refs; - struct freelist_node *next; -}; - -struct freelist_head { - struct freelist_node *head; -}; - -#define REFS_ON_FREELIST 0x80000000 -#define REFS_MASK 0x7FFFFFFF - -static inline void __freelist_add(struct freelist_node *node, struct freelist_head *list) -{ - /* - * Since the refcount is zero, and nobody can increase it once it's - * zero (except us, and we run only one copy of this method per node at - * a time, i.e. the single thread case), then we know we can safely - * change the next pointer of the node; however, once the refcount is - * back above zero, then other threads could increase it (happens under - * heavy contention, when the refcount goes to zero in between a load - * and a refcount increment of a node in try_get, then back up to - * something non-zero, then the refcount increment is done by the other - * thread) -- so if the CAS to add the node to the actual list fails, - * decrese the refcount and leave the add operation to the next thread - * who puts the refcount back to zero (which could be us, hence the - * loop). - */ - struct freelist_node *head = READ_ONCE(list->head); - - for (;;) { - WRITE_ONCE(node->next, head); - atomic_set_release(&node->refs, 1); - - if (!try_cmpxchg_release(&list->head, &head, node)) { - /* - * Hmm, the add failed, but we can only try again when - * the refcount goes back to zero. - */ - if (atomic_fetch_add_release(REFS_ON_FREELIST - 1, &node->refs) == 1) - continue; - } - return; - } -} - -static inline void freelist_add(struct freelist_node *node, struct freelist_head *list) -{ - /* - * We know that the should-be-on-freelist bit is 0 at this point, so - * it's safe to set it using a fetch_add. - */ - if (!atomic_fetch_add_release(REFS_ON_FREELIST, &node->refs)) { - /* - * Oh look! We were the last ones referencing this node, and we - * know we want to add it to the free list, so let's do it! - */ - __freelist_add(node, list); - } -} - -static inline struct freelist_node *freelist_try_get(struct freelist_head *list) -{ - struct freelist_node *prev, *next, *head = smp_load_acquire(&list->head); - unsigned int refs; - - while (head) { - prev = head; - refs = atomic_read(&head->refs); - if ((refs & REFS_MASK) == 0 || - !atomic_try_cmpxchg_acquire(&head->refs, &refs, refs+1)) { - head = smp_load_acquire(&list->head); - continue; - } - - /* - * Good, reference count has been incremented (it wasn't at - * zero), which means we can read the next and not worry about - * it changing between now and the time we do the CAS. - */ - next = READ_ONCE(head->next); - if (try_cmpxchg_acquire(&list->head, &head, next)) { - /* - * Yay, got the node. This means it was on the list, - * which means should-be-on-freelist must be false no - * matter the refcount (because nobody else knows it's - * been taken off yet, it can't have been put back on). - */ - WARN_ON_ONCE(atomic_read(&head->refs) & REFS_ON_FREELIST); - - /* - * Decrease refcount twice, once for our ref, and once - * for the list's ref. - */ - atomic_fetch_add(-2, &head->refs); - - return head; - } - - /* - * OK, the head must have changed on us, but we still need to decrement - * the refcount we increased. - */ - refs = atomic_fetch_add(-1, &prev->refs); - if (refs == REFS_ON_FREELIST + 1) - __freelist_add(prev, list); - } - - return NULL; -} - -#endif /* FREELIST_H */ diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h deleted file mode 100644 index eaa0ac5f9003..000000000000 --- a/include/linux/frontswap.h +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_FRONTSWAP_H -#define _LINUX_FRONTSWAP_H - -#include <linux/swap.h> -#include <linux/mm.h> -#include <linux/bitops.h> -#include <linux/jump_label.h> - -struct frontswap_ops { - void (*init)(unsigned); /* this swap type was just swapon'ed */ - int (*store)(unsigned, pgoff_t, struct page *); /* store a page */ - int (*load)(unsigned, pgoff_t, struct page *, bool *); /* load a page */ - void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */ - void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */ -}; - -int frontswap_register_ops(const struct frontswap_ops *ops); - -extern void frontswap_init(unsigned type, unsigned long *map); -extern int __frontswap_store(struct page *page); -extern int __frontswap_load(struct page *page); -extern void __frontswap_invalidate_page(unsigned, pgoff_t); -extern void __frontswap_invalidate_area(unsigned); - -#ifdef CONFIG_FRONTSWAP -extern struct static_key_false frontswap_enabled_key; - -static inline bool frontswap_enabled(void) -{ - return static_branch_unlikely(&frontswap_enabled_key); -} - -static inline void frontswap_map_set(struct swap_info_struct *p, - unsigned long *map) -{ - p->frontswap_map = map; -} - -static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) -{ - return p->frontswap_map; -} -#else -/* all inline routines become no-ops and all externs are ignored */ - -static inline bool frontswap_enabled(void) -{ - return false; -} - -static inline void frontswap_map_set(struct swap_info_struct *p, - unsigned long *map) -{ -} - -static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) -{ - return NULL; -} -#endif - -static inline int frontswap_store(struct page *page) -{ - if (frontswap_enabled()) - return __frontswap_store(page); - - return -1; -} - -static inline int frontswap_load(struct page *page) -{ - if (frontswap_enabled()) - return __frontswap_load(page); - - return -1; -} - -static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset) -{ - if (frontswap_enabled()) - __frontswap_invalidate_page(type, offset); -} - -static inline void frontswap_invalidate_area(unsigned type) -{ - if (frontswap_enabled()) - __frontswap_invalidate_area(type); -} - -#endif /* _LINUX_FRONTSWAP_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 562f2623c9c9..98b7a7a8c42e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -67,7 +67,7 @@ struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; -struct fscrypt_info; +struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; @@ -338,6 +338,20 @@ enum rw_hint { #define IOCB_NOIO (1 << 20) /* can use bio alloc cache */ #define IOCB_ALLOC_CACHE (1 << 21) +/* + * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the + * iocb completion can be passed back to the owner for execution from a safe + * context rather than needing to be punted through a workqueue. If this + * flag is set, the bio completion handling may set iocb->dio_complete to a + * handler function and iocb->private to context information for that handler. + * The issuer should call the handler with that context information from task + * context to complete the processing of the iocb. Note that while this + * provides a task context for the dio_complete() callback, it should only be + * used on the completion side for non-IO generating completions. It's fine to + * call blocking functions from this callback, but they should not wait for + * unrelated IO (like cache flushing, new IO generation, etc). + */ +#define IOCB_DIO_CALLER_COMP (1 << 22) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ @@ -351,7 +365,8 @@ enum rw_hint { { IOCB_WRITE, "WRITE" }, \ { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ - { IOCB_ALLOC_CACHE, "ALLOC_CACHE" } + { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ + { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } struct kiocb { struct file *ki_filp; @@ -360,7 +375,23 @@ struct kiocb { void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ - struct wait_page_queue *ki_waitq; /* for async buffered IO */ + union { + /* + * Only used for async buffered reads, where it denotes the + * page waitqueue associated with completing the read. Valid + * IFF IOCB_WAITQ is set. + */ + struct wait_page_queue *ki_waitq; + /* + * Can be used for O_DIRECT IO, where the completion handling + * is punted back to the issuer of the IO. May only be set + * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer + * must then check for presence of this handler when ki_complete + * is invoked. The data passed in to this handler must be + * assigned to ->private when dio_complete is assigned. + */ + ssize_t (*dio_complete)(void *data); + }; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) @@ -423,7 +454,7 @@ extern const struct address_space_operations empty_aops; * It is also used to block modification of page cache contents through * memory mappings. * @gfp_mask: Memory allocation flags to use for allocating pages. - * @i_mmap_writable: Number of VM_SHARED mappings. + * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings. * @nr_thps: Number of THPs in the pagecache (non-shmem only). * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. @@ -447,11 +478,11 @@ struct address_space { atomic_t nr_thps; #endif struct rb_root_cached i_mmap; - struct rw_semaphore i_mmap_rwsem; unsigned long nrpages; pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; + struct rw_semaphore i_mmap_rwsem; errseq_t wb_err; spinlock_t private_lock; struct list_head private_list; @@ -526,7 +557,7 @@ static inline int mapping_mapped(struct address_space *mapping) /* * Might pages of this file have been modified in userspace? - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap + * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * @@ -640,9 +671,9 @@ struct inode { }; dev_t i_rdev; loff_t i_size; - struct timespec64 i_atime; - struct timespec64 i_mtime; - struct timespec64 i_ctime; + struct timespec64 __i_atime; + struct timespec64 __i_mtime; + struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -707,7 +738,7 @@ struct inode { #endif #ifdef CONFIG_FS_ENCRYPTION - struct fscrypt_info *i_crypt_info; + struct fscrypt_inode_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY @@ -1011,7 +1042,10 @@ static inline struct file *get_file(struct file *f) atomic_long_inc(&f->f_count); return f; } -#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) + +struct file *get_file_rcu(struct file __rcu **f); +struct file *get_file_active(struct file **f); + #define file_count(x) atomic_long_read(&(x)->f_count) #define MAX_NON_LFS ((1UL<<31) - 1) @@ -1069,7 +1103,7 @@ extern void fasync_free(struct fasync_struct *); extern void kill_fasync(struct fasync_struct **, int, int); extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); -extern int f_setown(struct file *filp, unsigned long arg, int force); +extern int f_setown(struct file *filp, int who, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); @@ -1088,13 +1122,15 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_NOATIME BIT(10) /* Do not update access times. */ #define SB_NODIRATIME BIT(11) /* Do not update directory access times */ #define SB_SILENT BIT(15) -#define SB_POSIXACL BIT(16) /* VFS does not apply the umask */ +#define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */ #define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */ #define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */ #define SB_I_VERSION BIT(23) /* Update inode I_version field */ #define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ +#define SB_DEAD BIT(21) +#define SB_DYING BIT(24) #define SB_SUBMOUNT BIT(26) #define SB_FORCE BIT(27) #define SB_NOSEC BIT(28) @@ -1133,6 +1169,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ +#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ /* Possible states of 'frozen' field */ enum { @@ -1147,7 +1184,8 @@ enum { #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) struct sb_writers { - int frozen; /* Is sb frozen? */ + unsigned short frozen; /* Is sb frozen? */ + unsigned short freeze_holders; /* Who froze fs? */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -1172,7 +1210,7 @@ struct super_block { #ifdef CONFIG_SECURITY void *s_security; #endif - const struct xattr_handler **s_xattr; + const struct xattr_handler * const *s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ @@ -1187,6 +1225,7 @@ struct super_block { struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; + struct bdev_handle *s_bdev_handle; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; struct hlist_node s_instances; @@ -1231,7 +1270,7 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ - struct shrinker s_shrink; /* per-sb shrinker handle */ + struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; @@ -1474,7 +1513,109 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kgid_has_mapping(fs_userns, kgid); } -extern struct timespec64 current_time(struct inode *inode); +struct timespec64 current_time(struct inode *inode); +struct timespec64 inode_set_ctime_current(struct inode *inode); + +static inline time64_t inode_get_atime_sec(const struct inode *inode) +{ + return inode->__i_atime.tv_sec; +} + +static inline long inode_get_atime_nsec(const struct inode *inode) +{ + return inode->__i_atime.tv_nsec; +} + +static inline struct timespec64 inode_get_atime(const struct inode *inode) +{ + return inode->__i_atime; +} + +static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->__i_atime = ts; + return ts; +} + +static inline struct timespec64 inode_set_atime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + return inode_set_atime_to_ts(inode, ts); +} + +static inline time64_t inode_get_mtime_sec(const struct inode *inode) +{ + return inode->__i_mtime.tv_sec; +} + +static inline long inode_get_mtime_nsec(const struct inode *inode) +{ + return inode->__i_mtime.tv_nsec; +} + +static inline struct timespec64 inode_get_mtime(const struct inode *inode) +{ + return inode->__i_mtime; +} + +static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->__i_mtime = ts; + return ts; +} + +static inline struct timespec64 inode_set_mtime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + return inode_set_mtime_to_ts(inode, ts); +} + +static inline time64_t inode_get_ctime_sec(const struct inode *inode) +{ + return inode->__i_ctime.tv_sec; +} + +static inline long inode_get_ctime_nsec(const struct inode *inode) +{ + return inode->__i_ctime.tv_nsec; +} + +static inline struct timespec64 inode_get_ctime(const struct inode *inode) +{ + return inode->__i_ctime; +} + +static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->__i_ctime = ts; + return ts; +} + +/** + * inode_set_ctime - set the ctime in the inode + * @inode: inode in which to set the ctime + * @sec: tv_sec value to set + * @nsec: tv_nsec value to set + * + * Set the ctime in @inode to { @sec, @nsec } + */ +static inline struct timespec64 inode_set_ctime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + + return inode_set_ctime_to_ts(inode, ts); +} + +struct timespec64 simple_inode_init_ts(struct inode *inode); /* * Snapshotting support. @@ -1770,6 +1911,7 @@ struct dir_context { struct iov_iter; struct io_uring_cmd; +struct offset_ctx; struct file_operations { struct module *owner; @@ -1798,7 +1940,7 @@ struct file_operations { ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); void (*splice_eof)(struct file *file); - int (*setlease)(struct file *, long, struct file_lock **, void **); + int (*setlease)(struct file *, int, struct file_lock **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); @@ -1850,7 +1992,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); - int (*update_time)(struct inode *, struct timespec64 *, int); + int (*update_time)(struct inode *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode); @@ -1863,6 +2005,7 @@ struct inode_operations { int (*fileattr_set)(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); + struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, @@ -1908,6 +2051,10 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, loff_t len, unsigned int remap_flags); +enum freeze_holder { + FREEZE_HOLDER_KERNEL = (1U << 0), + FREEZE_HOLDER_USERSPACE = (1U << 1), +}; struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); @@ -1920,9 +2067,9 @@ struct super_operations { void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - int (*freeze_super) (struct super_block *); + int (*freeze_super) (struct super_block *, enum freeze_holder who); int (*freeze_fs) (struct super_block *); - int (*thaw_super) (struct super_block *); + int (*thaw_super) (struct super_block *, enum freeze_holder who); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); @@ -1998,7 +2145,12 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) + +#ifdef CONFIG_FS_POSIX_ACL #define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL) +#else +#define IS_POSIXACL(inode) 0 +#endif #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) @@ -2200,7 +2352,7 @@ enum file_time_flags { extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); -int inode_update_time(struct inode *inode, struct timespec64 *time, int flags); +int inode_update_time(struct inode *inode, int flags); static inline void file_accessed(struct file *file) { @@ -2273,6 +2425,7 @@ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, void *data); +struct super_block *sget_dev(struct fs_context *fc, dev_t dev); /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ @@ -2296,8 +2449,8 @@ extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); -extern int freeze_super(struct super_block *super); -extern int thaw_super(struct super_block *super); +int freeze_super(struct super_block *super, enum freeze_holder who); +int thaw_super(struct super_block *super, enum freeze_holder who); extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); @@ -2306,7 +2459,8 @@ extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); -extern int generic_update_time(struct inode *, struct timespec64 *, int); +int inode_update_timestamps(struct inode *inode, int flags); +int generic_update_time(struct inode *, int); /* /sys/fs */ extern struct kobject *fs_kobj; @@ -2318,13 +2472,13 @@ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ - int refcnt; + atomic_t refcnt; struct audit_names *aname; const char iname[]; }; static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); -static inline struct mnt_idmap *file_mnt_idmap(struct file *file) +static inline struct mnt_idmap *file_mnt_idmap(const struct file *file) { return mnt_idmap(file->f_path.mnt); } @@ -2363,24 +2517,24 @@ struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); -struct file *backing_file_open(const struct path *path, int flags, +struct file *backing_file_open(const struct path *user_path, int flags, const struct path *real_path, const struct cred *cred); -struct path *backing_file_real_path(struct file *f); +struct path *backing_file_user_path(struct file *f); /* - * file_real_path - get the path corresponding to f_inode + * file_user_path - get the path to display for memory mapped file * - * When opening a backing file for a stackable filesystem (e.g., - * overlayfs) f_path may be on the stackable filesystem and f_inode on - * the underlying filesystem. When the path associated with f_inode is - * needed, this helper should be used instead of accessing f_path - * directly. -*/ -static inline const struct path *file_real_path(struct file *f) + * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file + * stored in ->vm_file is a backing file whose f_inode is on the underlying + * filesystem. When the mapped file path is displayed to user (e.g. via + * /proc/<pid>/maps), this helper should be used to get the path to display + * to the user, which is the path of the fd that user has requested to map. + */ +static inline const struct path *file_user_path(struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) - return backing_file_real_path(f); + return backing_file_user_path(f); return &f->f_path; } @@ -2545,6 +2699,13 @@ static inline bool inode_wrong_type(const struct inode *inode, umode_t mode) return (inode->i_mode ^ mode) & S_IFMT; } +/** + * file_start_write - get write access to a superblock for regular file io + * @file: the file we want to write to + * + * This is a variant of sb_start_write() which is a noop on non-regualr file. + * Should be matched with a call to file_end_write(). + */ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) @@ -2559,11 +2720,53 @@ static inline bool file_start_write_trylock(struct file *file) return sb_start_write_trylock(file_inode(file)->i_sb); } +/** + * file_end_write - drop write access to a superblock of a regular file + * @file: the file we wrote to + * + * Should be matched with a call to file_start_write(). + */ static inline void file_end_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; - __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); + sb_end_write(file_inode(file)->i_sb); +} + +/** + * kiocb_start_write - get write access to a superblock for async file io + * @iocb: the io context we want to submit the write with + * + * This is a variant of sb_start_write() for async io submission. + * Should be matched with a call to kiocb_end_write(). + */ +static inline void kiocb_start_write(struct kiocb *iocb) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + sb_start_write(inode->i_sb); + /* + * Fool lockdep by telling it the lock got released so that it + * doesn't complain about the held lock when we return to userspace. + */ + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); +} + +/** + * kiocb_end_write - drop write access to a superblock after async file io + * @iocb: the io context we sumbitted the write with + * + * Should be matched with a call to kiocb_start_write(). + */ +static inline void kiocb_end_write(struct kiocb *iocb) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + /* + * Tell lockdep we inherited freeze protection from submission thread. + */ + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); + sb_end_write(inode->i_sb); } /* @@ -2613,8 +2816,7 @@ static inline bool inode_is_open_for_write(const struct inode *inode) #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) static inline void i_readcount_dec(struct inode *inode) { - BUG_ON(!atomic_read(&inode->i_readcount)); - atomic_dec(&inode->i_readcount); + BUG_ON(atomic_dec_return(&inode->i_readcount) < 0); } static inline void i_readcount_inc(struct inode *inode) { @@ -2880,7 +3082,7 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); -void generic_fillattr(struct mnt_idmap *, struct inode *, struct kstat *); +void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); @@ -2919,7 +3121,6 @@ extern int vfs_readlink(struct dentry *, char __user *, int); extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); -extern struct super_block *get_super(struct block_device *); extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); @@ -2940,6 +3141,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); +void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename(struct mnt_idmap *, struct inode *, @@ -2956,7 +3159,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); -extern int simple_nosetlease(struct file *, long, struct file_lock **, void **); +extern int simple_nosetlease(struct file *, int, struct file_lock **, void **); extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); @@ -2977,6 +3180,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count, extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); +struct offset_ctx { + struct xarray xa; + u32 next_offset; +}; + +void simple_offset_init(struct offset_ctx *octx); +int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); +void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); +int simple_offset_rename_exchange(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry); +void simple_offset_destroy(struct offset_ctx *octx); + +extern const struct file_operations simple_offset_dir_operations; + extern int __generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_file_fsync(struct file *, loff_t, loff_t, int); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index ff6341e09925..c13e99cbbf81 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -109,6 +109,7 @@ struct fs_context { bool need_free:1; /* Need to call ops->free() */ bool global:1; /* Goes into &init_user_ns */ bool oldapi:1; /* Coming from mount(2) */ + bool exclusive:1; /* create new superblock, reject existing one */ }; struct fs_context_operations { @@ -135,6 +136,8 @@ extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value, size_t v_size); +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)); extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); @@ -150,14 +153,13 @@ extern int get_tree_nodev(struct fs_context *fc, extern int get_tree_single(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)); -extern int get_tree_single_reconf(struct fs_context *fc, - int (*fill_super)(struct super_block *sb, - struct fs_context *fc)); extern int get_tree_keyed(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc), void *key); +int setup_bdev_super(struct super_block *sb, int sb_flags, + struct fs_context *fc); extern int get_tree_bdev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)); diff --git a/include/linux/fs_enet_pd.h b/include/linux/fs_enet_pd.h deleted file mode 100644 index 77d783f71527..000000000000 --- a/include/linux/fs_enet_pd.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Platform information definitions for the - * universal Freescale Ethernet driver. - * - * Copyright (c) 2003 Intracom S.A. - * by Pantelis Antoniou <panto@intracom.gr> - * - * 2005 (c) MontaVista Software, Inc. - * Vitaly Bordug <vbordug@ru.mvista.com> - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#ifndef FS_ENET_PD_H -#define FS_ENET_PD_H - -#include <linux/clk.h> -#include <linux/string.h> -#include <linux/of_mdio.h> -#include <linux/if_ether.h> -#include <asm/types.h> - -#define FS_ENET_NAME "fs_enet" - -enum fs_id { - fsid_fec1, - fsid_fec2, - fsid_fcc1, - fsid_fcc2, - fsid_fcc3, - fsid_scc1, - fsid_scc2, - fsid_scc3, - fsid_scc4, -}; - -#define FS_MAX_INDEX 9 - -static inline int fs_get_fec_index(enum fs_id id) -{ - if (id >= fsid_fec1 && id <= fsid_fec2) - return id - fsid_fec1; - return -1; -} - -static inline int fs_get_fcc_index(enum fs_id id) -{ - if (id >= fsid_fcc1 && id <= fsid_fcc3) - return id - fsid_fcc1; - return -1; -} - -static inline int fs_get_scc_index(enum fs_id id) -{ - if (id >= fsid_scc1 && id <= fsid_scc4) - return id - fsid_scc1; - return -1; -} - -static inline int fs_fec_index2id(int index) -{ - int id = fsid_fec1 + index - 1; - if (id >= fsid_fec1 && id <= fsid_fec2) - return id; - return FS_MAX_INDEX; - } - -static inline int fs_fcc_index2id(int index) -{ - int id = fsid_fcc1 + index - 1; - if (id >= fsid_fcc1 && id <= fsid_fcc3) - return id; - return FS_MAX_INDEX; -} - -static inline int fs_scc_index2id(int index) -{ - int id = fsid_scc1 + index - 1; - if (id >= fsid_scc1 && id <= fsid_scc4) - return id; - return FS_MAX_INDEX; -} - -enum fs_mii_method { - fsmii_fixed, - fsmii_fec, - fsmii_bitbang, -}; - -enum fs_ioport { - fsiop_porta, - fsiop_portb, - fsiop_portc, - fsiop_portd, - fsiop_porte, -}; - -struct fs_mii_bit { - u32 offset; - u8 bit; - u8 polarity; -}; -struct fs_mii_bb_platform_info { - struct fs_mii_bit mdio_dir; - struct fs_mii_bit mdio_dat; - struct fs_mii_bit mdc_dat; - int delay; /* delay in us */ - int irq[32]; /* irqs per phy's */ -}; - -struct fs_platform_info { - - void(*init_ioports)(struct fs_platform_info *); - /* device specific information */ - int fs_no; /* controller index */ - char fs_type[4]; /* controller type */ - - u32 cp_page; /* CPM page */ - u32 cp_block; /* CPM sblock */ - u32 cp_command; /* CPM page/sblock/mcn */ - - u32 clk_trx; /* some stuff for pins & mux configuration*/ - u32 clk_rx; - u32 clk_tx; - u32 clk_route; - u32 clk_mask; - - u32 mem_offset; - u32 dpram_offset; - u32 fcc_regs_c; - - u32 device_flags; - - struct device_node *phy_node; - const struct fs_mii_bus_info *bus_info; - - int rx_ring, tx_ring; /* number of buffers on rx */ - __u8 macaddr[ETH_ALEN]; /* mac address */ - int rx_copybreak; /* limit we copy small frames */ - int napi_weight; /* NAPI weight */ - - int use_rmii; /* use RMII mode */ - int has_phy; /* if the network is phy container as well...*/ - - struct clk *clk_per; /* 'per' clock for register access */ -}; -struct fs_mii_fec_platform_info { - u32 irq[32]; - u32 mii_speed; -}; - -static inline int fs_get_id(struct fs_platform_info *fpi) -{ - if(strstr(fpi->fs_type, "SCC")) - return fs_scc_index2id(fpi->fs_no); - if(strstr(fpi->fs_type, "FCC")) - return fs_fcc_index2id(fpi->fs_no); - if(strstr(fpi->fs_type, "FEC")) - return fs_fec_index2id(fpi->fs_no); - return fpi->fs_no; -} - -#endif diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h index 54210a42c30d..2b1f74b24070 100644 --- a/include/linux/fs_stack.h +++ b/include/linux/fs_stack.h @@ -16,15 +16,15 @@ extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src); static inline void fsstack_copy_attr_atime(struct inode *dest, const struct inode *src) { - dest->i_atime = src->i_atime; + inode_set_atime_to_ts(dest, inode_get_atime(src)); } static inline void fsstack_copy_attr_times(struct inode *dest, const struct inode *src) { - dest->i_atime = src->i_atime; - dest->i_mtime = src->i_mtime; - dest->i_ctime = src->i_ctime; + inode_set_atime_to_ts(dest, inode_get_atime(src)); + inode_set_mtime_to_ts(dest, inode_get_mtime(src)); + inode_set_ctime_to_ts(dest, inode_get_ctime(src)); } #endif /* _LINUX_FS_STACK_H */ diff --git a/include/linux/fs_uart_pd.h b/include/linux/fs_uart_pd.h deleted file mode 100644 index 36b61ff39277..000000000000 --- a/include/linux/fs_uart_pd.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Platform information definitions for the CPM Uart driver. - * - * 2006 (c) MontaVista Software, Inc. - * Vitaly Bordug <vbordug@ru.mvista.com> - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#ifndef FS_UART_PD_H -#define FS_UART_PD_H - -#include <asm/types.h> - -enum fs_uart_id { - fsid_smc1_uart, - fsid_smc2_uart, - fsid_scc1_uart, - fsid_scc2_uart, - fsid_scc3_uart, - fsid_scc4_uart, - fs_uart_nr, -}; - -static inline int fs_uart_id_scc2fsid(int id) -{ - return fsid_scc1_uart + id - 1; -} - -static inline int fs_uart_id_fsid2scc(int id) -{ - return id - fsid_scc1_uart + 1; -} - -static inline int fs_uart_id_smc2fsid(int id) -{ - return fsid_smc1_uart + id - 1; -} - -static inline int fs_uart_id_fsid2smc(int id) -{ - return id - fsid_smc1_uart + 1; -} - -struct fs_uart_platform_info { - void(*init_ioports)(struct fs_uart_platform_info *); - /* device specific information */ - int fs_no; /* controller index */ - char fs_type[4]; /* controller type */ - u32 uart_clk; - u8 tx_num_fifo; - u8 tx_buf_size; - u8 rx_num_fifo; - u8 rx_buf_size; - u8 brg; - u8 clk_rx; - u8 clk_tx; -}; - -static inline int fs_uart_get_id(struct fs_uart_platform_info *fpi) -{ - if(strstr(fpi->fs_type, "SMC")) - return fs_uart_id_smc2fsid(fpi->fs_no); - if(strstr(fpi->fs_type, "SCC")) - return fs_uart_id_scc2fsid(fpi->fs_no); - return fpi->fs_no; -} - -#endif diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index c895b12737a1..12f9e455d569 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -31,7 +31,7 @@ #define FSCRYPT_CONTENTS_ALIGNMENT 16 union fscrypt_policy; -struct fscrypt_info; +struct fscrypt_inode_info; struct fs_parameter; struct seq_file; @@ -59,26 +59,55 @@ struct fscrypt_name { #ifdef CONFIG_FS_ENCRYPTION -/* - * If set, the fscrypt bounce page pool won't be allocated (unless another - * filesystem needs it). Set this if the filesystem always uses its own bounce - * pages for writes and therefore won't need the fscrypt bounce page pool. - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - /* Crypto operations for filesystems */ struct fscrypt_operations { - /* Set of optional flags; see above for allowed flags */ - unsigned int flags; + /* + * If set, then fs/crypto/ will allocate a global bounce page pool the + * first time an encryption key is set up for a file. The bounce page + * pool is required by the following functions: + * + * - fscrypt_encrypt_pagecache_blocks() + * - fscrypt_zeroout_range() for files not using inline crypto + * + * If the filesystem doesn't use those, it doesn't need to set this. + */ + unsigned int needs_bounce_pages : 1; /* - * If set, this is a filesystem-specific key description prefix that - * will be accepted for "logon" keys for v1 fscrypt policies, in - * addition to the generic prefix "fscrypt:". This functionality is - * deprecated, so new filesystems shouldn't set this field. + * If set, then fs/crypto/ will allow the use of encryption settings + * that assume inode numbers fit in 32 bits (i.e. + * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64}), provided that the other + * prerequisites for these settings are also met. This is only useful + * if the filesystem wants to support inline encryption hardware that is + * limited to 32-bit or 64-bit data unit numbers and where programming + * keyslots is very slow. */ - const char *key_prefix; + unsigned int has_32bit_inodes : 1; + + /* + * If set, then fs/crypto/ will allow users to select a crypto data unit + * size that is less than the filesystem block size. This is done via + * the log2_data_unit_size field of the fscrypt policy. This flag is + * not compatible with filesystems that encrypt variable-length blocks + * (i.e. blocks that aren't all equal to filesystem's block size), for + * example as a result of compression. It's also not compatible with + * the fscrypt_encrypt_block_inplace() and + * fscrypt_decrypt_block_inplace() functions. + */ + unsigned int supports_subblock_data_units : 1; + + /* + * This field exists only for backwards compatibility reasons and should + * only be set by the filesystems that are setting it already. It + * contains the filesystem-specific key description prefix that is + * accepted for "logon" keys for v1 fscrypt policies. This + * functionality is deprecated in favor of the generic prefix + * "fscrypt:", which itself is deprecated in favor of the filesystem + * keyring ioctls such as FS_IOC_ADD_ENCRYPTION_KEY. Filesystems that + * are newly adding fscrypt support should not set this field. + */ + const char *legacy_key_prefix; /* * Get the fscrypt context of the given inode. @@ -146,21 +175,6 @@ struct fscrypt_operations { bool (*has_stable_inodes)(struct super_block *sb); /* - * Get the number of bits that the filesystem uses to represent inode - * numbers and file logical block numbers. - * - * By default, both of these are assumed to be 64-bit. This function - * can be implemented to declare that either or both of these numbers is - * shorter, which may allow the use of the - * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags and/or the use of - * inline crypto hardware whose maximum DUN length is less than 64 bits - * (e.g., eMMC v5.2 spec compliant hardware). This function only needs - * to be implemented if support for one of these features is needed. - */ - void (*get_ino_and_lblk_bits)(struct super_block *sb, - int *ino_bits_ret, int *lblk_bits_ret); - - /* * Return an array of pointers to the block devices to which the * filesystem may write encrypted file contents, NULL if the filesystem * only has a single such block device, or an ERR_PTR() on error. @@ -178,7 +192,8 @@ struct fscrypt_operations { unsigned int *num_devs); }; -static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). @@ -390,7 +405,8 @@ static inline void fscrypt_set_ops(struct super_block *sb, } #else /* !CONFIG_FS_ENCRYPTION */ -static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info(const struct inode *inode) { return NULL; } @@ -868,7 +884,7 @@ static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) */ static inline bool fscrypt_has_encryption_key(const struct inode *inode) { - return fscrypt_get_info(inode) != NULL; + return fscrypt_get_inode_info(inode) != NULL; } /** diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index ed48e4f1e755..bcb6609b54b3 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -96,8 +96,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask) if (file->f_mode & FMODE_NONOTIFY) return 0; - /* Overlayfs internal files have fake f_path */ - path = file_real_path(file); + path = &file->f_path; return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d7d96c806bff..c0892d75ce33 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -760,9 +760,6 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark, /* Find mark belonging to given group in the list of marks */ extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, struct fsnotify_group *group); -/* Get cached fsid of filesystem containing object */ -extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, - __kernel_fsid_t *fsid); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int obj_type, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index aad9cf8876b5..e8921871ef9a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -862,13 +862,8 @@ extern int skip_trace(unsigned long ip); extern void ftrace_module_init(struct module *mod); extern void ftrace_module_enable(struct module *mod); extern void ftrace_release_mod(struct module *mod); - -extern void ftrace_disable_daemon(void); -extern void ftrace_enable_daemon(void); #else /* CONFIG_DYNAMIC_FTRACE */ static inline int skip_trace(unsigned long ip) { return 0; } -static inline void ftrace_disable_daemon(void) { } -static inline void ftrace_enable_daemon(void) { } static inline void ftrace_module_init(struct module *mod) { } static inline void ftrace_module_enable(struct module *mod) { } static inline void ftrace_release_mod(struct module *mod) { } diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h new file mode 100644 index 000000000000..ca49947f0a77 --- /dev/null +++ b/include/linux/fw_table.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * fw_tables.h - Parsing support for ACPI and ACPI-like tables provided by + * platform or device firmware + * + * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> + * Copyright (C) 2023 Intel Corp. + */ +#ifndef _FW_TABLE_H_ +#define _FW_TABLE_H_ + +union acpi_subtable_headers; + +typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, + const unsigned long end); + +typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header, + void *arg, const unsigned long end); + +struct acpi_subtable_proc { + int id; + acpi_tbl_entry_handler handler; + acpi_tbl_entry_handler_arg handler_arg; + void *arg; + int count; +}; + +union acpi_subtable_headers { + struct acpi_subtable_header common; + struct acpi_hmat_structure hmat; + struct acpi_prmt_module_header prmt; + struct acpi_cedt_header cedt; +}; + +int acpi_parse_entries_array(char *id, unsigned long table_size, + struct acpi_table_header *table_header, + struct acpi_subtable_proc *proc, + int proc_num, unsigned int max_entries); + +#endif diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 5700451b300f..2a72f55d26eb 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -41,6 +41,8 @@ struct device; struct fwnode_handle { struct fwnode_handle *secondary; const struct fwnode_operations *ops; + + /* The below is used solely by device links, don't use otherwise */ struct device *dev; struct list_head suppliers; struct list_head consumers; diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index 107613f7d792..847413164738 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -38,6 +38,7 @@ #include <asm/page.h> #include <linux/bug.h> +#include <linux/limits.h> #include <linux/log2.h> #include <linux/math.h> #include <linux/types.h> @@ -116,6 +117,11 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size) #define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) #define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) +#define __genradix_objs_per_page(_radix) \ + (PAGE_SIZE / sizeof((_radix)->type[0])) +#define __genradix_page_remainder(_radix) \ + (PAGE_SIZE % sizeof((_radix)->type[0])) + #define __genradix_idx_to_offset(_radix, _idx) \ __idx_to_offset(_idx, __genradix_obj_size(_radix)) @@ -179,11 +185,35 @@ void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t); #define genradix_iter_peek(_iter, _radix) \ (__genradix_cast(_radix) \ __genradix_iter_peek(_iter, &(_radix)->tree, \ - PAGE_SIZE / __genradix_obj_size(_radix))) + __genradix_objs_per_page(_radix))) + +void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *, + size_t, size_t); + +/** + * genradix_iter_peek_prev - get first entry at or below iterator's current + * position + * @_iter: a genradix_iter + * @_radix: genradix being iterated over + * + * If no more entries exist at or below @_iter's current position, returns NULL + */ +#define genradix_iter_peek_prev(_iter, _radix) \ + (__genradix_cast(_radix) \ + __genradix_iter_peek_prev(_iter, &(_radix)->tree, \ + __genradix_objs_per_page(_radix), \ + __genradix_obj_size(_radix) + \ + __genradix_page_remainder(_radix))) static inline void __genradix_iter_advance(struct genradix_iter *iter, size_t obj_size) { + if (iter->offset + obj_size < iter->offset) { + iter->offset = SIZE_MAX; + iter->pos = SIZE_MAX; + return; + } + iter->offset += obj_size; if (!is_power_of_2(obj_size) && @@ -196,6 +226,25 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, #define genradix_iter_advance(_iter, _radix) \ __genradix_iter_advance(_iter, __genradix_obj_size(_radix)) +static inline void __genradix_iter_rewind(struct genradix_iter *iter, + size_t obj_size) +{ + if (iter->offset == 0 || + iter->offset == SIZE_MAX) { + iter->offset = SIZE_MAX; + return; + } + + if ((iter->offset & (PAGE_SIZE - 1)) == 0) + iter->offset -= PAGE_SIZE % obj_size; + + iter->offset -= obj_size; + iter->pos--; +} + +#define genradix_iter_rewind(_iter, _radix) \ + __genradix_iter_rewind(_iter, __genradix_obj_size(_radix)) + #define genradix_for_each_from(_radix, _iter, _p, _start) \ for (_iter = genradix_iter_init(_radix, _start); \ (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \ @@ -213,6 +262,23 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, #define genradix_for_each(_radix, _iter, _p) \ genradix_for_each_from(_radix, _iter, _p, 0) +#define genradix_last_pos(_radix) \ + (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1) + +/** + * genradix_for_each_reverse - iterate over entry in a genradix, reverse order + * @_radix: genradix to iterate over + * @_iter: a genradix_iter to track current position + * @_p: pointer to genradix entry type + * + * On every iteration, @_p will point to the current entry, and @_iter.pos + * will be the current entry's index. + */ +#define genradix_for_each_reverse(_radix, _iter, _p) \ + for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\ + (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\ + genradix_iter_rewind(&_iter, _radix)) + int __genradix_prealloc(struct __genradix *, size_t, gfp_t); /** diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 2984b0cb24b1..d4da060b7532 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -2,6 +2,7 @@ #ifndef GENL_MAGIC_FUNC_H #define GENL_MAGIC_FUNC_H +#include <linux/args.h> #include <linux/build_bug.h> #include <linux/genl_magic_struct.h> @@ -23,7 +24,7 @@ #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ [tag_name] = { .type = NLA_NESTED }, -static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { +static struct nla_policy CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { #include GENL_MAGIC_INCLUDE_FILE }; @@ -209,7 +210,7 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ * Magic: define op number to op name mapping {{{1 * {{{2 */ -static const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) +static const char *CONCATENATE(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) { switch (cmd) { #undef GENL_op @@ -235,7 +236,7 @@ static const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) .cmd = op_name, \ }, -#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) +#define ZZZ_genl_ops CONCATENATE(GENL_MAGIC_FAMILY, _genl_ops) static struct genl_ops ZZZ_genl_ops[] __read_mostly = { #include GENL_MAGIC_INCLUDE_FILE }; @@ -248,32 +249,32 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = { * and provide register/unregister functions. * {{{2 */ -#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) +#define ZZZ_genl_family CONCATENATE(GENL_MAGIC_FAMILY, _genl_family) static struct genl_family ZZZ_genl_family; /* * Magic: define multicast groups * Magic: define multicast group registration helper */ -#define ZZZ_genl_mcgrps CONCAT_(GENL_MAGIC_FAMILY, _genl_mcgrps) +#define ZZZ_genl_mcgrps CONCATENATE(GENL_MAGIC_FAMILY, _genl_mcgrps) static const struct genl_multicast_group ZZZ_genl_mcgrps[] = { #undef GENL_mc_group #define GENL_mc_group(group) { .name = #group, }, #include GENL_MAGIC_INCLUDE_FILE }; -enum CONCAT_(GENL_MAGIC_FAMILY, group_ids) { +enum CONCATENATE(GENL_MAGIC_FAMILY, group_ids) { #undef GENL_mc_group -#define GENL_mc_group(group) CONCAT_(GENL_MAGIC_FAMILY, _group_ ## group), +#define GENL_mc_group(group) CONCATENATE(GENL_MAGIC_FAMILY, _group_ ## group), #include GENL_MAGIC_INCLUDE_FILE }; #undef GENL_mc_group #define GENL_mc_group(group) \ -static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ +static int CONCATENATE(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ struct sk_buff *skb, gfp_t flags) \ { \ unsigned int group_id = \ - CONCAT_(GENL_MAGIC_FAMILY, _group_ ## group); \ + CONCATENATE(GENL_MAGIC_FAMILY, _group_ ## group); \ return genlmsg_multicast(&ZZZ_genl_family, skb, 0, \ group_id, flags); \ } @@ -289,8 +290,8 @@ static struct genl_family ZZZ_genl_family __ro_after_init = { #ifdef GENL_MAGIC_FAMILY_HDRSZ .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), #endif - .maxattr = ARRAY_SIZE(CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy))-1, - .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), + .maxattr = ARRAY_SIZE(CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy))-1, + .policy = CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy), .ops = ZZZ_genl_ops, .n_ops = ARRAY_SIZE(ZZZ_genl_ops), .mcgrps = ZZZ_genl_mcgrps, @@ -299,12 +300,12 @@ static struct genl_family ZZZ_genl_family __ro_after_init = { .module = THIS_MODULE, }; -int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) +int CONCATENATE(GENL_MAGIC_FAMILY, _genl_register)(void) { return genl_register_family(&ZZZ_genl_family); } -void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) +void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void) { genl_unregister_family(&ZZZ_genl_family); } diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index f81d48987528..a419d93789ff 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -14,14 +14,12 @@ # error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion" #endif +#include <linux/args.h> #include <linux/genetlink.h> #include <linux/types.h> -#define CONCAT__(a,b) a ## b -#define CONCAT_(a,b) CONCAT__(a,b) - -extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void); -extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); +extern int CONCATENATE(GENL_MAGIC_FAMILY, _genl_register)(void); +extern void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void); /* * Extension of genl attribute validation policies {{{2 diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 665f06675c83..de292a007138 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -8,6 +8,7 @@ #include <linux/topology.h> struct vm_area_struct; +struct mempolicy; /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) @@ -262,7 +263,9 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages(gfp_t gfp, unsigned int order); -struct folio *folio_alloc(gfp_t gfp, unsigned order); +struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, + struct mempolicy *mpol, pgoff_t ilx, int nid); +struct folio *folio_alloc(gfp_t gfp, unsigned int order); struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage); #else @@ -270,6 +273,11 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_node(numa_node_id(), gfp_mask, order); } +static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, + struct mempolicy *mpol, pgoff_t ilx, int nid) +{ + return alloc_pages(gfp, order); +} static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) { return __folio_alloc_node(gfp, order, numa_node_id()); @@ -320,11 +328,13 @@ extern void page_frag_free(void *addr); #define free_page(addr) free_pages((addr), 0) void page_alloc_init_cpuhp(void); +int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp); void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); void page_alloc_init_late(void); +void setup_pcp_cacheinfo(void); /* * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 1c4385a00f88..db2dfbae8edb 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -159,7 +159,6 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size, int gpiod_set_config(struct gpio_desc *desc, unsigned long config); int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce); -int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); void gpiod_toggle_active_low(struct gpio_desc *desc); int gpiod_is_active_low(const struct gpio_desc *desc); @@ -494,13 +493,6 @@ static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned int deboun return -ENOSYS; } -static inline int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) -{ - /* GPIO can never have been requested */ - WARN_ON(desc); - return -ENOSYS; -} - static inline void gpiod_toggle_active_low(struct gpio_desc *desc) { /* GPIO can never have been requested */ @@ -614,8 +606,6 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev); int devm_acpi_dev_add_driver_gpios(struct device *dev, const struct acpi_gpio_mapping *gpios); -struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label); - #else /* CONFIG_GPIOLIB && CONFIG_ACPI */ #include <linux/err.h> @@ -633,12 +623,6 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, return -ENXIO; } -static inline struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, - char *label) -{ - return ERR_PTR(-ENOSYS); -} - #endif /* CONFIG_GPIOLIB && CONFIG_ACPI */ diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4f0c5d62c8f3..0aed62f0c633 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -3,6 +3,8 @@ #define __LINUX_GPIO_DRIVER_H #include <linux/bits.h> +#include <linux/cleanup.h> +#include <linux/err.h> #include <linux/irqchip/chained_irq.h> #include <linux/irqdomain.h> #include <linux/irqhandler.h> @@ -529,8 +531,7 @@ struct gpio_chip { #endif /* CONFIG_OF_GPIO */ }; -extern const char *gpiochip_is_requested(struct gpio_chip *gc, - unsigned int offset); +const char *gpiochip_is_requested(struct gpio_chip *gc, unsigned int offset); /** * for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range @@ -549,9 +550,9 @@ extern const char *gpiochip_is_requested(struct gpio_chip *gc, for_each_requested_gpio_in_range(chip, i, 0, chip->ngpio, label) /* add/remove chips */ -extern int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); +int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -599,13 +600,23 @@ static inline int gpiochip_add(struct gpio_chip *gc) { return gpiochip_add_data(gc, NULL); } -extern void gpiochip_remove(struct gpio_chip *gc); -extern int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); +void gpiochip_remove(struct gpio_chip *gc); +int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, + void *data, struct lock_class_key *lock_key, + struct lock_class_key *request_key); + +struct gpio_device *gpio_device_find(void *data, + int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_device *gpio_device_find_by_label(const char *label); +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); + +struct gpio_device *gpio_device_get(struct gpio_device *gdev); +void gpio_device_put(struct gpio_device *gdev); + +DEFINE_FREE(gpio_device_put, struct gpio_device *, + if (IS_ERR_OR_NULL(_T)) gpio_device_put(_T)); -extern struct gpio_chip *gpiochip_find(void *data, - int (*match)(struct gpio_chip *gc, void *data)); +struct device *gpio_device_to_device(struct gpio_device *gdev); bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); @@ -758,14 +769,23 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); +struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); +struct gpio_desc * +gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum); + +struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev); + #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); - struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); +struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); + +/* struct gpio_device getters */ +int gpio_device_get_base(struct gpio_device *gdev); #else /* CONFIG_GPIOLIB */ diff --git a/include/linux/greybus/svc.h b/include/linux/greybus/svc.h index 5afaf5f06856..da547fb9071b 100644 --- a/include/linux/greybus/svc.h +++ b/include/linux/greybus/svc.h @@ -100,7 +100,4 @@ bool gb_svc_watchdog_enabled(struct gb_svc *svc); int gb_svc_watchdog_enable(struct gb_svc *svc); int gb_svc_watchdog_disable(struct gb_svc *svc); -int gb_svc_protocol_init(void); -void gb_svc_protocol_exit(void); - #endif /* __SVC_H */ diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h new file mode 100644 index 000000000000..86ea7c63a0d2 --- /dev/null +++ b/include/linux/habanalabs/cpucp_if.h @@ -0,0 +1,1417 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2020-2023 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef CPUCP_IF_H +#define CPUCP_IF_H + +#include <linux/types.h> +#include <linux/if_ether.h> + +#include "hl_boot_if.h" + +#define NUM_HBM_PSEUDO_CH 2 +#define NUM_HBM_CH_PER_DEV 8 +#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT 0 +#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK 0x00000001 +#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_SHIFT 1 +#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK 0x00000002 +#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_SHIFT 2 +#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK 0x00000004 +#define CPUCP_PKT_HBM_ECC_INFO_DERR_SHIFT 3 +#define CPUCP_PKT_HBM_ECC_INFO_DERR_MASK 0x00000008 +#define CPUCP_PKT_HBM_ECC_INFO_SERR_SHIFT 4 +#define CPUCP_PKT_HBM_ECC_INFO_SERR_MASK 0x00000010 +#define CPUCP_PKT_HBM_ECC_INFO_TYPE_SHIFT 5 +#define CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK 0x00000020 +#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6 +#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0 + +#define PLL_MAP_MAX_BITS 128 +#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) + +enum eq_event_id { + EQ_EVENT_NIC_STS_REQUEST = 0, + EQ_EVENT_PWR_MODE_0, + EQ_EVENT_PWR_MODE_1, + EQ_EVENT_PWR_MODE_2, + EQ_EVENT_PWR_MODE_3, + EQ_EVENT_PWR_BRK_ENTRY, + EQ_EVENT_PWR_BRK_EXIT, + EQ_EVENT_HEARTBEAT, +}; + +/* + * info of the pkt queue pointers in the first async occurrence + */ +struct cpucp_pkt_sync_err { + __le32 pi; + __le32 ci; +}; + +struct hl_eq_hbm_ecc_data { + /* SERR counter */ + __le32 sec_cnt; + /* DERR counter */ + __le32 dec_cnt; + /* Supplemental Information according to the mask bits */ + __le32 hbm_ecc_info; + /* Address in hbm where the ecc happened */ + __le32 first_addr; + /* SERR continuous address counter */ + __le32 sec_cont_cnt; + __le32 pad; +}; + +/* + * EVENT QUEUE + */ + +struct hl_eq_header { + __le32 reserved; + __le32 ctl; +}; + +struct hl_eq_ecc_data { + __le64 ecc_address; + __le64 ecc_syndrom; + __u8 memory_wrapper_idx; + __u8 is_critical; + __le16 block_id; + __u8 pad[4]; +}; + +enum hl_sm_sei_cause { + SM_SEI_SO_OVERFLOW, + SM_SEI_LBW_4B_UNALIGNED, + SM_SEI_AXI_RESPONSE_ERR +}; + +struct hl_eq_sm_sei_data { + __le32 sei_log; + /* enum hl_sm_sei_cause */ + __u8 sei_cause; + __u8 pad[3]; +}; + +enum hl_fw_alive_severity { + FW_ALIVE_SEVERITY_MINOR, + FW_ALIVE_SEVERITY_CRITICAL +}; + +struct hl_eq_fw_alive { + __le64 uptime_seconds; + __le32 process_id; + __le32 thread_id; + /* enum hl_fw_alive_severity */ + __u8 severity; + __u8 pad[7]; +}; + +struct hl_eq_intr_cause { + __le64 intr_cause_data; +}; + +struct hl_eq_pcie_drain_ind_data { + struct hl_eq_intr_cause intr_cause; + __le64 drain_wr_addr_lbw; + __le64 drain_rd_addr_lbw; + __le64 drain_wr_addr_hbw; + __le64 drain_rd_addr_hbw; +}; + +struct hl_eq_razwi_lbw_info_regs { + __le32 rr_aw_razwi_reg; + __le32 rr_aw_razwi_id_reg; + __le32 rr_ar_razwi_reg; + __le32 rr_ar_razwi_id_reg; +}; + +struct hl_eq_razwi_hbw_info_regs { + __le32 rr_aw_razwi_hi_reg; + __le32 rr_aw_razwi_lo_reg; + __le32 rr_aw_razwi_id_reg; + __le32 rr_ar_razwi_hi_reg; + __le32 rr_ar_razwi_lo_reg; + __le32 rr_ar_razwi_id_reg; +}; + +/* razwi_happened masks */ +#define RAZWI_HAPPENED_HBW 0x1 +#define RAZWI_HAPPENED_LBW 0x2 +#define RAZWI_HAPPENED_AW 0x4 +#define RAZWI_HAPPENED_AR 0x8 + +struct hl_eq_razwi_info { + __le32 razwi_happened_mask; + union { + struct hl_eq_razwi_lbw_info_regs lbw; + struct hl_eq_razwi_hbw_info_regs hbw; + }; + __le32 pad; +}; + +struct hl_eq_razwi_with_intr_cause { + struct hl_eq_razwi_info razwi_info; + struct hl_eq_intr_cause intr_cause; +}; + +#define HBM_CA_ERR_CMD_LIFO_LEN 8 +#define HBM_RD_ERR_DATA_LIFO_LEN 8 +#define HBM_WR_PAR_CMD_LIFO_LEN 11 + +enum hl_hbm_sei_cause { + /* Command/address parity error event is split into 2 events due to + * size limitation: ODD suffix for odd HBM CK_t cycles and EVEN suffix + * for even HBM CK_t cycles + */ + HBM_SEI_CMD_PARITY_EVEN, + HBM_SEI_CMD_PARITY_ODD, + /* Read errors can be reflected as a combination of SERR/DERR/parity + * errors. Therefore, we define one event for all read error types. + * LKD will perform further proccessing. + */ + HBM_SEI_READ_ERR, + HBM_SEI_WRITE_DATA_PARITY_ERR, + HBM_SEI_CATTRIP, + HBM_SEI_MEM_BIST_FAIL, + HBM_SEI_DFI, + HBM_SEI_INV_TEMP_READ_OUT, + HBM_SEI_BIST_FAIL, +}; + +/* Masks for parsing hl_hbm_sei_headr fields */ +#define HBM_ECC_SERR_CNTR_MASK 0xFF +#define HBM_ECC_DERR_CNTR_MASK 0xFF00 +#define HBM_RD_PARITY_CNTR_MASK 0xFF0000 + +/* HBM index and MC index are known by the event_id */ +struct hl_hbm_sei_header { + union { + /* relevant only in case of HBM read error */ + struct { + __u8 ecc_serr_cnt; + __u8 ecc_derr_cnt; + __u8 read_par_cnt; + __u8 reserved; + }; + /* All other cases */ + __le32 cnt; + }; + __u8 sei_cause; /* enum hl_hbm_sei_cause */ + __u8 mc_channel; /* range: 0-3 */ + __u8 mc_pseudo_channel; /* range: 0-7 */ + __u8 is_critical; +}; + +#define HBM_RD_ADDR_SID_SHIFT 0 +#define HBM_RD_ADDR_SID_MASK 0x1 +#define HBM_RD_ADDR_BG_SHIFT 1 +#define HBM_RD_ADDR_BG_MASK 0x6 +#define HBM_RD_ADDR_BA_SHIFT 3 +#define HBM_RD_ADDR_BA_MASK 0x18 +#define HBM_RD_ADDR_COL_SHIFT 5 +#define HBM_RD_ADDR_COL_MASK 0x7E0 +#define HBM_RD_ADDR_ROW_SHIFT 11 +#define HBM_RD_ADDR_ROW_MASK 0x3FFF800 + +struct hbm_rd_addr { + union { + /* bit fields are only for FW use */ + struct { + u32 dbg_rd_err_addr_sid:1; + u32 dbg_rd_err_addr_bg:2; + u32 dbg_rd_err_addr_ba:2; + u32 dbg_rd_err_addr_col:6; + u32 dbg_rd_err_addr_row:15; + u32 reserved:6; + }; + __le32 rd_addr_val; + }; +}; + +#define HBM_RD_ERR_BEAT_SHIFT 2 +/* dbg_rd_err_misc fields: */ +/* Read parity is calculated per DW on every beat */ +#define HBM_RD_ERR_PAR_ERR_BEAT0_SHIFT 0 +#define HBM_RD_ERR_PAR_ERR_BEAT0_MASK 0x3 +#define HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT 8 +#define HBM_RD_ERR_PAR_DATA_BEAT0_MASK 0x300 +/* ECC is calculated per PC on every beat */ +#define HBM_RD_ERR_SERR_BEAT0_SHIFT 16 +#define HBM_RD_ERR_SERR_BEAT0_MASK 0x10000 +#define HBM_RD_ERR_DERR_BEAT0_SHIFT 24 +#define HBM_RD_ERR_DERR_BEAT0_MASK 0x100000 + +struct hl_eq_hbm_sei_read_err_intr_info { + /* DFI_RD_ERR_REP_ADDR */ + struct hbm_rd_addr dbg_rd_err_addr; + /* DFI_RD_ERR_REP_ERR */ + union { + struct { + /* bit fields are only for FW use */ + u32 dbg_rd_err_par:8; + u32 dbg_rd_err_par_data:8; + u32 dbg_rd_err_serr:4; + u32 dbg_rd_err_derr:4; + u32 reserved:8; + }; + __le32 dbg_rd_err_misc; + }; + /* DFI_RD_ERR_REP_DM */ + __le32 dbg_rd_err_dm; + /* DFI_RD_ERR_REP_SYNDROME */ + __le32 dbg_rd_err_syndrome; + /* DFI_RD_ERR_REP_DATA */ + __le32 dbg_rd_err_data[HBM_RD_ERR_DATA_LIFO_LEN]; +}; + +struct hl_eq_hbm_sei_ca_par_intr_info { + /* 14 LSBs */ + __le16 dbg_row[HBM_CA_ERR_CMD_LIFO_LEN]; + /* 18 LSBs */ + __le32 dbg_col[HBM_CA_ERR_CMD_LIFO_LEN]; +}; + +#define WR_PAR_LAST_CMD_COL_SHIFT 0 +#define WR_PAR_LAST_CMD_COL_MASK 0x3F +#define WR_PAR_LAST_CMD_BG_SHIFT 6 +#define WR_PAR_LAST_CMD_BG_MASK 0xC0 +#define WR_PAR_LAST_CMD_BA_SHIFT 8 +#define WR_PAR_LAST_CMD_BA_MASK 0x300 +#define WR_PAR_LAST_CMD_SID_SHIFT 10 +#define WR_PAR_LAST_CMD_SID_MASK 0x400 + +/* Row address isn't latched */ +struct hbm_sei_wr_cmd_address { + /* DFI_DERR_LAST_CMD */ + union { + struct { + /* bit fields are only for FW use */ + u32 col:6; + u32 bg:2; + u32 ba:2; + u32 sid:1; + u32 reserved:21; + }; + __le32 dbg_wr_cmd_addr; + }; +}; + +struct hl_eq_hbm_sei_wr_par_intr_info { + /* entry 0: WR command address from the 1st cycle prior to the error + * entry 1: WR command address from the 2nd cycle prior to the error + * and so on... + */ + struct hbm_sei_wr_cmd_address dbg_last_wr_cmds[HBM_WR_PAR_CMD_LIFO_LEN]; + /* derr[0:1] - 1st HBM cycle DERR output + * derr[2:3] - 2nd HBM cycle DERR output + */ + __u8 dbg_derr; + /* extend to reach 8B */ + __u8 pad[3]; +}; + +/* + * this struct represents the following sei causes: + * command parity, ECC double error, ECC single error, dfi error, cattrip, + * temperature read-out, read parity error and write parity error. + * some only use the header while some have extra data. + */ +struct hl_eq_hbm_sei_data { + struct hl_hbm_sei_header hdr; + union { + struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_even_info; + struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_odd_info; + struct hl_eq_hbm_sei_read_err_intr_info read_err_info; + struct hl_eq_hbm_sei_wr_par_intr_info wr_parity_info; + }; +}; + +/* Engine/farm arc interrupt type */ +enum hl_engine_arc_interrupt_type { + /* Qman/farm ARC DCCM QUEUE FULL interrupt type */ + ENGINE_ARC_DCCM_QUEUE_FULL_IRQ = 1 +}; + +/* Data structure specifies details of payload of DCCM QUEUE FULL interrupt */ +struct hl_engine_arc_dccm_queue_full_irq { + /* Queue index value which caused DCCM QUEUE FULL */ + __le32 queue_index; + __le32 pad; +}; + +/* Data structure specifies details of QM/FARM ARC interrupt */ +struct hl_eq_engine_arc_intr_data { + /* ARC engine id e.g. DCORE0_TPC0_QM_ARC, DCORE0_TCP1_QM_ARC */ + __le32 engine_id; + __le32 intr_type; /* enum hl_engine_arc_interrupt_type */ + /* More info related to the interrupt e.g. queue index + * incase of DCCM_QUEUE_FULL interrupt. + */ + __le64 payload; + __le64 pad[5]; +}; + +#define ADDR_DEC_ADDRESS_COUNT_MAX 4 + +/* Data structure specifies details of ADDR_DEC interrupt */ +struct hl_eq_addr_dec_intr_data { + struct hl_eq_intr_cause intr_cause; + __le64 addr[ADDR_DEC_ADDRESS_COUNT_MAX]; + __u8 addr_cnt; + __u8 pad[7]; +}; + +struct hl_eq_entry { + struct hl_eq_header hdr; + union { + __le64 data_placeholder; + struct hl_eq_ecc_data ecc_data; + struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Obsolete */ + struct hl_eq_sm_sei_data sm_sei_data; + struct cpucp_pkt_sync_err pkt_sync_err; + struct hl_eq_fw_alive fw_alive; + struct hl_eq_intr_cause intr_cause; + struct hl_eq_pcie_drain_ind_data pcie_drain_ind_data; + struct hl_eq_razwi_info razwi_info; + struct hl_eq_razwi_with_intr_cause razwi_with_intr_cause; + struct hl_eq_hbm_sei_data sei_data; /* Gaudi2 HBM */ + struct hl_eq_engine_arc_intr_data arc_data; + struct hl_eq_addr_dec_intr_data addr_dec; + __le64 data[7]; + }; +}; + +#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) + +#define EQ_CTL_READY_SHIFT 31 +#define EQ_CTL_READY_MASK 0x80000000 + +#define EQ_CTL_EVENT_TYPE_SHIFT 16 +#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000 + +#define EQ_CTL_INDEX_SHIFT 0 +#define EQ_CTL_INDEX_MASK 0x0000FFFF + +enum pq_init_status { + PQ_INIT_STATUS_NA = 0, + PQ_INIT_STATUS_READY_FOR_CP, + PQ_INIT_STATUS_READY_FOR_HOST, + PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI, + PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR, + PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR +}; + +/* + * CpuCP Primary Queue Packets + * + * During normal operation, the host's kernel driver needs to send various + * messages to CpuCP, usually either to SET some value into a H/W periphery or + * to GET the current value of some H/W periphery. For example, SET the + * frequency of MME/TPC and GET the value of the thermal sensor. + * + * These messages can be initiated either by the User application or by the + * host's driver itself, e.g. power management code. In either case, the + * communication from the host's driver to CpuCP will *always* be in + * synchronous mode, meaning that the host will send a single message and poll + * until the message was acknowledged and the results are ready (if results are + * needed). + * + * This means that only a single message can be sent at a time and the host's + * driver must wait for its result before sending the next message. Having said + * that, because these are control messages which are sent in a relatively low + * frequency, this limitation seems acceptable. It's important to note that + * in case of multiple devices, messages to different devices *can* be sent + * at the same time. + * + * The message, inputs/outputs (if relevant) and fence object will be located + * on the device DDR at an address that will be determined by the host's driver. + * During device initialization phase, the host will pass to CpuCP that address. + * Most of the message types will contain inputs/outputs inside the message + * itself. The common part of each message will contain the opcode of the + * message (its type) and a field representing a fence object. + * + * When the host's driver wishes to send a message to CPU CP, it will write the + * message contents to the device DDR, clear the fence object and then write to + * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU. + * + * Upon receiving the interrupt (#121), CpuCP will read the message from the + * DDR. In case the message is a SET operation, CpuCP will first perform the + * operation and then write to the fence object on the device DDR. In case the + * message is a GET operation, CpuCP will first fill the results section on the + * device DDR and then write to the fence object. If an error occurred, CpuCP + * will fill the rc field with the right error code. + * + * In the meantime, the host's driver will poll on the fence object. Once the + * host sees that the fence object is signaled, it will read the results from + * the device DDR (if relevant) and resume the code execution in the host's + * driver. + * + * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 + * so the value being put by the host's driver matches the value read by CpuCP + * + * Non-QMAN packets should be limited to values 1 through (2^8 - 1) + * + * Detailed description: + * + * CPUCP_PACKET_DISABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU must NOT issue PCI + * transactions (read/write) towards the Host CPU. This also include + * sending MSI-X interrupts. + * This packet is usually sent before the device is moved to D3Hot state. + * + * CPUCP_PACKET_ENABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU is allowed to issue PCI + * transactions towards the Host CPU, including sending MSI-X interrupts. + * This packet is usually send after the device is moved to D0 state. + * + * CPUCP_PACKET_TEMPERATURE_GET - + * Fetch the current temperature / Max / Max Hyst / Critical / + * Critical Hyst of a specified thermal sensor. The packet's + * arguments specify the desired sensor and the field to get. + * + * CPUCP_PACKET_VOLTAGE_GET - + * Fetch the voltage / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_CURRENT_GET - + * Fetch the current / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_FAN_SPEED_GET - + * Fetch the speed / Max / Min of a specified fan. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_GET - + * Fetch the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_SET - + * Set the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor, type and value. + * + * CPUCP_PACKET_FREQUENCY_SET - + * Set the frequency of a specified PLL. The packet's arguments specify + * the PLL and the desired frequency. The actual frequency in the device + * might differ from the requested frequency. + * + * CPUCP_PACKET_FREQUENCY_GET - + * Fetch the frequency of a specified PLL. The packet's arguments specify + * the PLL. + * + * CPUCP_PACKET_LED_SET - + * Set the state of a specified led. The packet's arguments + * specify the led and the desired state. + * + * CPUCP_PACKET_I2C_WR - + * Write 32-bit value to I2C device. The packet's arguments specify the + * I2C bus, address and value. + * + * CPUCP_PACKET_I2C_RD - + * Read 32-bit value from I2C device. The packet's arguments specify the + * I2C bus and address. + * + * CPUCP_PACKET_INFO_GET - + * Fetch information from the device as specified in the packet's + * structure. The host's driver passes the max size it allows the CpuCP to + * write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ - + * Unmask the given IRQ. The IRQ number is specified in the value field. + * The packet is sent after receiving an interrupt and printing its + * relevant information. + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - + * Unmask the given IRQs. The IRQs numbers are specified in an array right + * after the cpucp_packet structure, where its first element is the array + * length. The packet is sent after a soft reset was done in order to + * handle any interrupts that were sent during the reset process. + * + * CPUCP_PACKET_TEST - + * Test packet for CpuCP connectivity. The CPU will put the fence value + * in the result field. + * + * CPUCP_PACKET_FREQUENCY_CURR_GET - + * Fetch the current frequency of a specified PLL. The packet's arguments + * specify the PLL. + * + * CPUCP_PACKET_MAX_POWER_GET - + * Fetch the maximal power of the device. + * + * CPUCP_PACKET_MAX_POWER_SET - + * Set the maximal power of the device. The packet's arguments specify + * the power. + * + * CPUCP_PACKET_EEPROM_DATA_GET - + * Get EEPROM data from the CpuCP kernel. The buffer is specified in the + * addr field. The CPU will put the returned data size in the result + * field. In addition, the host's driver passes the max size it allows the + * CpuCP to write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_NIC_INFO_GET - + * Fetch information from the device regarding the NIC. the host's driver + * passes the max size it allows the CpuCP to write to the structure, to + * prevent data corruption in case of mismatched driver/FW versions. + * + * CPUCP_PACKET_TEMPERATURE_SET - + * Set the value of the offset property of a specified thermal sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_VOLTAGE_SET - + * Trigger the reset_history property of a specified voltage sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_CURRENT_SET - + * Trigger the reset_history property of a specified current sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_PCIE_THROUGHPUT_GET - + * Get throughput of PCIe. + * The packet's arguments specify the transaction direction (TX/RX). + * The window measurement is 10[msec], and the return value is in KB/sec. + * + * CPUCP_PACKET_PCIE_REPLAY_CNT_GET + * Replay count measures number of "replay" events, which is basicly + * number of retries done by PCIe. + * + * CPUCP_PACKET_TOTAL_ENERGY_GET - + * Total Energy is measurement of energy from the time FW Linux + * is loaded. It is calculated by multiplying the average power + * by time (passed from armcp start). The units are in MilliJouls. + * + * CPUCP_PACKET_PLL_INFO_GET - + * Fetch frequencies of PLL from the required PLL IP. + * The packet's arguments specify the device PLL type + * Pll type is the PLL from device pll_index enum. + * The result is composed of 4 outputs, each is 16-bit + * frequency in MHz. + * + * CPUCP_PACKET_POWER_GET - + * Fetch the present power consumption of the device (Current * Voltage). + * + * CPUCP_PACKET_NIC_PFC_SET - + * Enable/Disable the NIC PFC feature. The packet's arguments specify the + * NIC port, relevant lanes to configure and one bit indication for + * enable/disable. + * + * CPUCP_PACKET_NIC_FAULT_GET - + * Fetch the current indication for local/remote faults from the NIC MAC. + * The result is 32-bit value of the relevant register. + * + * CPUCP_PACKET_NIC_LPBK_SET - + * Enable/Disable the MAC loopback feature. The packet's arguments specify + * the NIC port, relevant lanes to configure and one bit indication for + * enable/disable. + * + * CPUCP_PACKET_NIC_MAC_INIT - + * Configure the NIC MAC channels. The packet's arguments specify the + * NIC port and the speed. + * + * CPUCP_PACKET_MSI_INFO_SET - + * set the index number for each supported msi type going from + * host to device + * + * CPUCP_PACKET_NIC_XPCS91_REGS_GET - + * Fetch the un/correctable counters values from the NIC MAC. + * + * CPUCP_PACKET_NIC_STAT_REGS_GET - + * Fetch various NIC MAC counters from the NIC STAT. + * + * CPUCP_PACKET_NIC_STAT_REGS_CLR - + * Clear the various NIC MAC counters in the NIC STAT. + * + * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET - + * Fetch all NIC MAC counters from the NIC STAT. + * + * CPUCP_PACKET_IS_IDLE_CHECK - + * Check if the device is IDLE in regard to the DMA/compute engines + * and QMANs. The f/w will return a bitmask where each bit represents + * a different engine or QMAN according to enum cpucp_idle_mask. + * The bit will be 1 if the engine is NOT idle. + * + * CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET - + * Fetch all HBM replaced-rows and prending to be replaced rows data. + * + * CPUCP_PACKET_HBM_PENDING_ROWS_STATUS - + * Fetch status of HBM rows pending replacement and need a reboot to + * be replaced. + * + * CPUCP_PACKET_POWER_SET - + * Resets power history of device to 0 + * + * CPUCP_PACKET_ENGINE_CORE_ASID_SET - + * Packet to perform engine core ASID configuration + * + * CPUCP_PACKET_SEC_ATTEST_GET - + * Get the attestaion data that is collected during various stages of the + * boot sequence. the attestation data is also hashed with some unique + * number (nonce) provided by the host to prevent replay attacks. + * public key and certificate also provided as part of the FW response. + * + * CPUCP_PACKET_MONITOR_DUMP_GET - + * Get monitors registers dump from the CpuCP kernel. + * The CPU will put the registers dump in the a buffer allocated by the driver + * which address is passed via the CpuCp packet. In addition, the host's driver + * passes the max size it allows the CpuCP to write to the structure, to prevent + * data corruption in case of mismatched driver/FW versions. + * Obsolete. + * + * CPUCP_PACKET_GENERIC_PASSTHROUGH - + * Generic opcode for all firmware info that is only passed to host + * through the LKD, without getting parsed there. + * + * CPUCP_PACKET_ACTIVE_STATUS_SET - + * LKD sends FW indication whether device is free or in use, this indication is reported + * also to the BMC. + * + * CPUCP_PACKET_SOFT_RESET - + * Packet to perform soft-reset. + * + * CPUCP_PACKET_INTS_REGISTER - + * Packet to inform FW that queues have been established and LKD is ready to receive + * EQ events. + */ + +enum cpucp_packet_id { + CPUCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ + CPUCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ + CPUCP_PACKET_TEMPERATURE_GET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_GET, /* sysfs */ + CPUCP_PACKET_CURRENT_GET, /* sysfs */ + CPUCP_PACKET_FAN_SPEED_GET, /* sysfs */ + CPUCP_PACKET_PWM_GET, /* sysfs */ + CPUCP_PACKET_PWM_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_GET, /* sysfs */ + CPUCP_PACKET_LED_SET, /* debugfs */ + CPUCP_PACKET_I2C_WR, /* debugfs */ + CPUCP_PACKET_I2C_RD, /* debugfs */ + CPUCP_PACKET_INFO_GET, /* IOCTL */ + CPUCP_PACKET_FLASH_PROGRAM_REMOVED, + CPUCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ + CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ + CPUCP_PACKET_TEST, /* internal */ + CPUCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_SET, /* sysfs */ + CPUCP_PACKET_EEPROM_DATA_GET, /* sysfs */ + CPUCP_PACKET_NIC_INFO_GET, /* internal */ + CPUCP_PACKET_TEMPERATURE_SET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_SET, /* sysfs */ + CPUCP_PACKET_CURRENT_SET, /* sysfs */ + CPUCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ + CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ + CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */ + CPUCP_PACKET_PLL_INFO_GET, /* internal */ + CPUCP_PACKET_NIC_STATUS, /* internal */ + CPUCP_PACKET_POWER_GET, /* internal */ + CPUCP_PACKET_NIC_PFC_SET, /* internal */ + CPUCP_PACKET_NIC_FAULT_GET, /* internal */ + CPUCP_PACKET_NIC_LPBK_SET, /* internal */ + CPUCP_PACKET_NIC_MAC_CFG, /* internal */ + CPUCP_PACKET_MSI_INFO_SET, /* internal */ + CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */ + CPUCP_PACKET_IS_IDLE_CHECK, /* internal */ + CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET,/* internal */ + CPUCP_PACKET_HBM_PENDING_ROWS_STATUS, /* internal */ + CPUCP_PACKET_POWER_SET, /* internal */ + CPUCP_PACKET_RESERVED, /* not used */ + CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ + CPUCP_PACKET_RESERVED2, /* not used */ + CPUCP_PACKET_SEC_ATTEST_GET, /* internal */ + CPUCP_PACKET_RESERVED3, /* not used */ + CPUCP_PACKET_RESERVED4, /* not used */ + CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ + CPUCP_PACKET_RESERVED5, /* not used */ + CPUCP_PACKET_RESERVED6, /* not used */ + CPUCP_PACKET_RESERVED7, /* not used */ + CPUCP_PACKET_GENERIC_PASSTHROUGH, /* IOCTL */ + CPUCP_PACKET_RESERVED8, /* not used */ + CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ + CPUCP_PACKET_RESERVED9, /* not used */ + CPUCP_PACKET_RESERVED10, /* not used */ + CPUCP_PACKET_RESERVED11, /* not used */ + CPUCP_PACKET_RESERVED12, /* internal */ + CPUCP_PACKET_RESERVED13, /* internal */ + CPUCP_PACKET_SOFT_RESET, /* internal */ + CPUCP_PACKET_INTS_REGISTER, /* internal */ + CPUCP_PACKET_ID_MAX /* must be last */ +}; + +#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 + +#define CPUCP_PKT_CTL_RC_SHIFT 12 +#define CPUCP_PKT_CTL_RC_MASK 0x0000F000 + +#define CPUCP_PKT_CTL_OPCODE_SHIFT 16 +#define CPUCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 + +#define CPUCP_PKT_RES_PLL_OUT0_SHIFT 0 +#define CPUCP_PKT_RES_PLL_OUT0_MASK 0x000000000000FFFFull +#define CPUCP_PKT_RES_PLL_OUT1_SHIFT 16 +#define CPUCP_PKT_RES_PLL_OUT1_MASK 0x00000000FFFF0000ull +#define CPUCP_PKT_RES_PLL_OUT2_SHIFT 32 +#define CPUCP_PKT_RES_PLL_OUT2_MASK 0x0000FFFF00000000ull +#define CPUCP_PKT_RES_PLL_OUT3_SHIFT 48 +#define CPUCP_PKT_RES_PLL_OUT3_MASK 0xFFFF000000000000ull + +#define CPUCP_PKT_RES_EEPROM_OUT0_SHIFT 0 +#define CPUCP_PKT_RES_EEPROM_OUT0_MASK 0x000000000000FFFFull +#define CPUCP_PKT_RES_EEPROM_OUT1_SHIFT 16 +#define CPUCP_PKT_RES_EEPROM_OUT1_MASK 0x0000000000FF0000ull + +#define CPUCP_PKT_VAL_PFC_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_PFC_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_PFC_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_PFC_IN2_MASK 0x000000000000001Eull + +#define CPUCP_PKT_VAL_LPBK_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_LPBK_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull + +#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull + +/* heartbeat status bits */ +#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0 +#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001 + +struct cpucp_packet { + union { + __le64 value; /* For SET packets */ + __le64 result; /* For GET packets */ + __le64 addr; /* For PQ */ + }; + + __le32 ctl; + + __le32 fence; /* Signal to host that message is completed */ + + union { + struct {/* For temperature/current/voltage/fan/pwm get/set */ + __le16 sensor_index; + __le16 type; + }; + + struct { /* For I2C read/write */ + __u8 i2c_bus; + __u8 i2c_addr; + __u8 i2c_reg; + /* + * In legacy implemetations, i2c_len was not present, + * was unused and just added as pad. + * So if i2c_len is 0, it is treated as legacy + * and r/w 1 Byte, else if i2c_len is specified, + * its treated as new multibyte r/w support. + */ + __u8 i2c_len; + }; + + struct {/* For PLL info fetch */ + __le16 pll_type; + /* TODO pll_reg is kept temporary before removal */ + __le16 pll_reg; + }; + + /* For any general request */ + __le32 index; + + /* For frequency get/set */ + __le32 pll_index; + + /* For led set */ + __le32 led_index; + + /* For get CpuCP info/EEPROM data/NIC info */ + __le32 data_max_size; + + /* + * For any general status bitmask. Shall be used whenever the + * result cannot be used to hold general purpose data. + */ + __le32 status_mask; + + /* random, used once number, for security packets */ + __le32 nonce; + }; + + union { + /* For NIC requests */ + __le32 port_index; + + /* For Generic packet sub index */ + __le32 pkt_subidx; + }; +}; + +struct cpucp_unmask_irq_arr_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 irqs[]; +}; + +struct cpucp_nic_status_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 data[]; +}; + +struct cpucp_array_data_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 data[]; +}; + +enum cpucp_led_index { + CPUCP_LED0_INDEX = 0, + CPUCP_LED1_INDEX, + CPUCP_LED2_INDEX, + CPUCP_LED_MAX_INDEX = CPUCP_LED2_INDEX +}; + +/* + * enum cpucp_packet_rc - Error return code + * @cpucp_packet_success -> in case of success. + * @cpucp_packet_invalid -> this is to support first generation platforms. + * @cpucp_packet_fault -> in case of processing error like failing to + * get device binding or semaphore etc. + * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. + * @cpucp_packet_invalid_params -> when checking parameter like length of buffer + * or attribute value etc. + * @cpucp_packet_rc_max -> It indicates size of enum so should be at last. + */ +enum cpucp_packet_rc { + cpucp_packet_success, + cpucp_packet_invalid, + cpucp_packet_fault, + cpucp_packet_invalid_pkt, + cpucp_packet_invalid_params, + cpucp_packet_rc_max +}; + +/* + * cpucp_temp_type should adhere to hwmon_temp_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_temp_type { + cpucp_temp_input, + cpucp_temp_min = 4, + cpucp_temp_min_hyst, + cpucp_temp_max = 6, + cpucp_temp_max_hyst, + cpucp_temp_crit, + cpucp_temp_crit_hyst, + cpucp_temp_offset = 19, + cpucp_temp_lowest = 21, + cpucp_temp_highest = 22, + cpucp_temp_reset_history = 23, + cpucp_temp_warn = 24, + cpucp_temp_max_crit = 25, + cpucp_temp_max_warn = 26, +}; + +enum cpucp_in_attributes { + cpucp_in_input, + cpucp_in_min, + cpucp_in_max, + cpucp_in_lowest = 6, + cpucp_in_highest = 7, + cpucp_in_reset_history, + cpucp_in_intr_alarm_a, + cpucp_in_intr_alarm_b, +}; + +enum cpucp_curr_attributes { + cpucp_curr_input, + cpucp_curr_min, + cpucp_curr_max, + cpucp_curr_lowest = 6, + cpucp_curr_highest = 7, + cpucp_curr_reset_history +}; + +enum cpucp_fan_attributes { + cpucp_fan_input, + cpucp_fan_min = 2, + cpucp_fan_max +}; + +enum cpucp_pwm_attributes { + cpucp_pwm_input, + cpucp_pwm_enable +}; + +enum cpucp_pcie_throughput_attributes { + cpucp_pcie_throughput_tx, + cpucp_pcie_throughput_rx +}; + +/* TODO temporary kept before removal */ +enum cpucp_pll_reg_attributes { + cpucp_pll_nr_reg, + cpucp_pll_nf_reg, + cpucp_pll_od_reg, + cpucp_pll_div_factor_reg, + cpucp_pll_div_sel_reg +}; + +/* TODO temporary kept before removal */ +enum cpucp_pll_type_attributes { + cpucp_pll_cpu, + cpucp_pll_pci, +}; + +/* + * cpucp_power_type aligns with hwmon_power_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_power_type { + CPUCP_POWER_INPUT = 8, + CPUCP_POWER_INPUT_HIGHEST = 9, + CPUCP_POWER_RESET_INPUT_HISTORY = 11 +}; + +/* + * MSI type enumeration table for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES). + * Changing the order of entries or removing entries is not allowed. + */ +enum cpucp_msi_type { + CPUCP_EVENT_QUEUE_MSI_TYPE, + CPUCP_NIC_PORT1_MSI_TYPE, + CPUCP_NIC_PORT3_MSI_TYPE, + CPUCP_NIC_PORT5_MSI_TYPE, + CPUCP_NIC_PORT7_MSI_TYPE, + CPUCP_NIC_PORT9_MSI_TYPE, + CPUCP_EVENT_QUEUE_ERR_MSI_TYPE, + CPUCP_NUM_OF_MSI_TYPES +}; + +/* + * PLL enumeration table used for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table. + * Changing the order of entries or removing entries is not allowed. + */ +enum pll_index { + CPU_PLL = 0, + PCI_PLL = 1, + NIC_PLL = 2, + DMA_PLL = 3, + MESH_PLL = 4, + MME_PLL = 5, + TPC_PLL = 6, + IF_PLL = 7, + SRAM_PLL = 8, + NS_PLL = 9, + HBM_PLL = 10, + MSS_PLL = 11, + DDR_PLL = 12, + VID_PLL = 13, + BANK_PLL = 14, + MMU_PLL = 15, + IC_PLL = 16, + MC_PLL = 17, + EMMC_PLL = 18, + D2D_PLL = 19, + CS_PLL = 20, + C2C_PLL = 21, + NCH_PLL = 22, + C2M_PLL = 23, + PLL_MAX +}; + +enum rl_index { + TPC_RL = 0, + MME_RL, + EDMA_RL, +}; + +enum pvt_index { + PVT_SW, + PVT_SE, + PVT_NW, + PVT_NE +}; + +/* Event Queue Packets */ + +struct eq_generic_event { + __le64 data[7]; +}; + +/* + * CpuCP info + */ + +#define CARD_NAME_MAX_LEN 16 +#define CPUCP_MAX_SENSORS 128 +#define CPUCP_MAX_NICS 128 +#define CPUCP_LANES_PER_NIC 4 +#define CPUCP_NIC_QSFP_EEPROM_MAX_LEN 1024 +#define CPUCP_MAX_NIC_LANES (CPUCP_MAX_NICS * CPUCP_LANES_PER_NIC) +#define CPUCP_NIC_MASK_ARR_LEN ((CPUCP_MAX_NICS + 63) / 64) +#define CPUCP_NIC_POLARITY_ARR_LEN ((CPUCP_MAX_NIC_LANES + 63) / 64) +#define CPUCP_HBM_ROW_REPLACE_MAX 32 + +struct cpucp_sensor { + __le32 type; + __le32 flags; +}; + +/** + * struct cpucp_card_types - ASIC card type. + * @cpucp_card_type_pci: PCI card. + * @cpucp_card_type_pmc: PCI Mezzanine Card. + */ +enum cpucp_card_types { + cpucp_card_type_pci, + cpucp_card_type_pmc +}; + +#define CPUCP_SEC_CONF_ENABLED_SHIFT 0 +#define CPUCP_SEC_CONF_ENABLED_MASK 0x00000001 + +#define CPUCP_SEC_CONF_FLASH_WP_SHIFT 1 +#define CPUCP_SEC_CONF_FLASH_WP_MASK 0x00000002 + +#define CPUCP_SEC_CONF_EEPROM_WP_SHIFT 2 +#define CPUCP_SEC_CONF_EEPROM_WP_MASK 0x00000004 + +/** + * struct cpucp_security_info - Security information. + * @config: configuration bit field + * @keys_num: number of stored keys + * @revoked_keys: revoked keys bit field + * @min_svn: minimal security version + */ +struct cpucp_security_info { + __u8 config; + __u8 keys_num; + __u8 revoked_keys; + __u8 min_svn; +}; + +/** + * struct cpucp_info - Info from CpuCP that is necessary to the host's driver + * @sensors: available sensors description. + * @kernel_version: CpuCP linux kernel version. + * @reserved: reserved field. + * @card_type: card configuration type. + * @card_location: in a server, each card has different connections topology + * depending on its location (relevant for PMC card type) + * @cpld_version: CPLD programmed F/W version. + * @infineon_version: Infineon main DC-DC version. + * @fuse_version: silicon production FUSE information. + * @thermal_version: thermald S/W version. + * @cpucp_version: CpuCP S/W version. + * @infineon_second_stage_version: Infineon 2nd stage DC-DC version. + * @dram_size: available DRAM size. + * @card_name: card name that will be displayed in HWMON subsystem on the host + * @tpc_binning_mask: TPC binning mask, 1 bit per TPC instance + * (0 = functional, 1 = binned) + * @decoder_binning_mask: Decoder binning mask, 1 bit per decoder instance + * (0 = functional, 1 = binned), maximum 1 per dcore + * @sram_binning: Categorize SRAM functionality + * (0 = fully functional, 1 = lower-half is not functional, + * 2 = upper-half is not functional) + * @sec_info: security information + * @pll_map: Bit map of supported PLLs for current ASIC version. + * @mme_binning_mask: MME binning mask, + * bits [0:6] <==> dcore0 mme fma + * bits [7:13] <==> dcore1 mme fma + * bits [14:20] <==> dcore0 mme ima + * bits [21:27] <==> dcore1 mme ima + * For each group, if the 6th bit is set then first 5 bits + * represent the col's idx [0-31], otherwise these bits are + * ignored, and col idx 32 is binned. 7th bit is don't care. + * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance + * (0 = functional 1 = binned) + * @memory_repair_flag: eFuse flag indicating memory repair + * @edma_binning_mask: EDMA binning mask, 1 bit per EDMA instance + * (0 = functional 1 = binned) + * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance + * (0 = functional 1 = binned) + * @interposer_version: Interposer version programmed in eFuse + * @substrate_version: Substrate version programmed in eFuse + * @eq_health_check_supported: eq health check feature supported in FW. + * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM. + * @fw_os_version: Firmware OS Version + */ +struct cpucp_info { + struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; + __u8 kernel_version[VERSION_MAX_LEN]; + __le32 reserved; + __le32 card_type; + __le32 card_location; + __le32 cpld_version; + __le32 infineon_version; + __u8 fuse_version[VERSION_MAX_LEN]; + __u8 thermal_version[VERSION_MAX_LEN]; + __u8 cpucp_version[VERSION_MAX_LEN]; + __le32 infineon_second_stage_version; + __le64 dram_size; + char card_name[CARD_NAME_MAX_LEN]; + __le64 tpc_binning_mask; + __le64 decoder_binning_mask; + __u8 sram_binning; + __u8 dram_binning_mask; + __u8 memory_repair_flag; + __u8 edma_binning_mask; + __u8 xbar_binning_mask; + __u8 interposer_version; + __u8 substrate_version; + __u8 eq_health_check_supported; + struct cpucp_security_info sec_info; + __le32 fw_hbm_region_size; + __u8 pll_map[PLL_MAP_LEN]; + __le64 mme_binning_mask; + __u8 fw_os_version[VERSION_MAX_LEN]; +}; + +struct cpucp_mac_addr { + __u8 mac_addr[ETH_ALEN]; +}; + +enum cpucp_serdes_type { + TYPE_1_SERDES_TYPE, + TYPE_2_SERDES_TYPE, + HLS1_SERDES_TYPE, + HLS1H_SERDES_TYPE, + HLS2_SERDES_TYPE, + HLS2_TYPE_1_SERDES_TYPE, + MAX_NUM_SERDES_TYPE, /* number of types */ + UNKNOWN_SERDES_TYPE = 0xFFFF /* serdes_type is u16 */ +}; + +struct cpucp_nic_info { + struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS]; + __le64 link_mask[CPUCP_NIC_MASK_ARR_LEN]; + __le64 pol_tx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; + __le64 pol_rx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; + __le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN]; + __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN]; + __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN]; + __le16 serdes_type; /* enum cpucp_serdes_type */ + __le16 tx_swap_map[CPUCP_MAX_NICS]; + __u8 reserved[6]; +}; + +#define PAGE_DISCARD_MAX 64 + +struct page_discard_info { + __u8 num_entries; + __u8 reserved[7]; + __le32 mmu_page_idx[PAGE_DISCARD_MAX]; +}; + +/* + * struct frac_val - fracture value represented by "integer.frac". + * @integer: the integer part of the fracture value; + * @frac: the fracture part of the fracture value. + */ +struct frac_val { + union { + struct { + __le16 integer; + __le16 frac; + }; + __le32 val; + }; +}; + +/* + * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp". + * @integer: the integer part of the SER value; + * @exp: the exponent part of the SER value. + */ +struct ser_val { + __le16 integer; + __le16 exp; +}; + +/* + * struct cpucp_nic_status - describes the status of a NIC port. + * @port: NIC port index. + * @bad_format_cnt: e.g. CRC. + * @responder_out_of_sequence_psn_cnt: e.g NAK. + * @high_ber_reinit_cnt: link reinit due to high BER. + * @correctable_err_cnt: e.g. bit-flip. + * @uncorrectable_err_cnt: e.g. MAC errors. + * @retraining_cnt: re-training counter. + * @up: is port up. + * @pcs_link: has PCS link. + * @phy_ready: is PHY ready. + * @auto_neg: is Autoneg enabled. + * @timeout_retransmission_cnt: timeout retransmission events. + * @high_ber_cnt: high ber events. + * @pre_fec_ser: pre FEC SER value. + * @post_fec_ser: post FEC SER value. + * @throughput: measured throughput. + * @latency: measured latency. + */ +struct cpucp_nic_status { + __le32 port; + __le32 bad_format_cnt; + __le32 responder_out_of_sequence_psn_cnt; + __le32 high_ber_reinit; + __le32 correctable_err_cnt; + __le32 uncorrectable_err_cnt; + __le32 retraining_cnt; + __u8 up; + __u8 pcs_link; + __u8 phy_ready; + __u8 auto_neg; + __le32 timeout_retransmission_cnt; + __le32 high_ber_cnt; + struct ser_val pre_fec_ser; + struct ser_val post_fec_ser; + struct frac_val bandwidth; + struct frac_val lat; +}; + +enum cpucp_hbm_row_replace_cause { + REPLACE_CAUSE_DOUBLE_ECC_ERR, + REPLACE_CAUSE_MULTI_SINGLE_ECC_ERR, +}; + +struct cpucp_hbm_row_info { + __u8 hbm_idx; + __u8 pc; + __u8 sid; + __u8 bank_idx; + __le16 row_addr; + __u8 replaced_row_cause; /* enum cpucp_hbm_row_replace_cause */ + __u8 pad; +}; + +struct cpucp_hbm_row_replaced_rows_info { + __le16 num_replaced_rows; + __u8 pad[6]; + struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX]; +}; + +enum cpu_reset_status { + CPU_RST_STATUS_NA = 0, + CPU_RST_STATUS_SOFT_RST_DONE = 1, +}; + +#define SEC_PCR_DATA_BUF_SZ 256 +#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ +#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ + +/* + * struct cpucp_sec_attest_info - attestation report of the boot + * @pcr_data: raw values of the PCR registers + * @pcr_num_reg: number of PCR registers in the pcr_data array + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) + * @nonce: number only used once. random number provided by host. this also + * passed to the quote command as a qualifying data. + * @pcr_quote_len: length of the attestation quote data (bytes) + * @pcr_quote: attestation report data structure + * @quote_sig_len: length of the attestation report signature (bytes) + * @quote_sig: signature structure of the attestation report + * @pub_data_len: length of the public data (bytes) + * @public_data: public key for the signed attestation + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the attestation signing key + */ +struct cpucp_sec_attest_info { + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; + __u8 pcr_num_reg; + __u8 pcr_reg_len; + __le16 pad0; + __le32 nonce; + __le16 pcr_quote_len; + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; + __u8 quote_sig_len; + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + +/* + * struct cpucp_dev_info_signed - device information signed by a secured device + * @info: device information structure as defined above + * @nonce: number only used once. random number provided by host. this number is + * hashed and signed along with the device information. + * @info_sig_len: length of the attestation signature (bytes) + * @info_sig: signature of the info + nonce data. + * @pub_data_len: length of the public data (bytes) + * @public_data: public key info signed info data + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the signing key + */ +struct cpucp_dev_info_signed { + struct cpucp_info info; /* assumed to be 64bit aligned */ + __le32 nonce; + __le32 pad0; + __u8 info_sig_len; + __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + +#define DCORE_MON_REGS_SZ 512 +/* + * struct dcore_monitor_regs_data - DCORE monitor regs data. + * the structure follows sync manager block layout. Obsolete. + * @mon_pay_addrl: array of payload address low bits. + * @mon_pay_addrh: array of payload address high bits. + * @mon_pay_data: array of payload data. + * @mon_arm: array of monitor arm. + * @mon_status: array of monitor status. + */ +struct dcore_monitor_regs_data { + __le32 mon_pay_addrl[DCORE_MON_REGS_SZ]; + __le32 mon_pay_addrh[DCORE_MON_REGS_SZ]; + __le32 mon_pay_data[DCORE_MON_REGS_SZ]; + __le32 mon_arm[DCORE_MON_REGS_SZ]; + __le32 mon_status[DCORE_MON_REGS_SZ]; +}; + +/* contains SM data for each SYNC_MNGR (Obsolete) */ +struct cpucp_monitor_dump { + struct dcore_monitor_regs_data sync_mngr_w_s; + struct dcore_monitor_regs_data sync_mngr_e_s; + struct dcore_monitor_regs_data sync_mngr_w_n; + struct dcore_monitor_regs_data sync_mngr_e_n; +}; + +/* + * The Type of the generic request (and other input arguments) will be fetched from user by reading + * from "pkt_subidx" field in struct cpucp_packet. + * + * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions. + */ +enum hl_passthrough_type { + HL_PASSTHROUGH_VERSIONS, +}; + +#endif /* CPUCP_IF_H */ diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h new file mode 100644 index 000000000000..93366d5621fd --- /dev/null +++ b/include/linux/habanalabs/hl_boot_if.h @@ -0,0 +1,792 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2018-2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HL_BOOT_IF_H +#define HL_BOOT_IF_H + +#define LKD_HARD_RESET_MAGIC 0xED7BD694 /* deprecated - do not use */ +#define HL_POWER9_HOST_MAGIC 0x1DA30009 + +#define BOOT_FIT_SRAM_OFFSET 0x200000 + +#define VERSION_MAX_LEN 128 + +enum cpu_boot_err { + CPU_BOOT_ERR_DRAM_INIT_FAIL = 0, + CPU_BOOT_ERR_FIT_CORRUPTED = 1, + CPU_BOOT_ERR_TS_INIT_FAIL = 2, + CPU_BOOT_ERR_DRAM_SKIPPED = 3, + CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4, + CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5, + CPU_BOOT_ERR_NIC_FW_FAIL = 6, + CPU_BOOT_ERR_SECURITY_NOT_RDY = 7, + CPU_BOOT_ERR_SECURITY_FAIL = 8, + CPU_BOOT_ERR_EFUSE_FAIL = 9, + CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10, + CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11, + CPU_BOOT_ERR_PLL_FAIL = 12, + CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13, + CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18, + CPU_BOOT_ERR_BINNING_FAIL = 19, + CPU_BOOT_ERR_TPM_FAIL = 20, + CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, + CPU_BOOT_ERR_EEPROM_FAIL = 22, + CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL = 23, + CPU_BOOT_ERR_ENABLED = 31, + CPU_BOOT_ERR_SCND_EN = 63, + CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ +}; + +/* + * Mask for fatal failures + * This mask contains all possible fatal failures, and a dynamic code + * will clear the non-relevant ones. + */ +#define CPU_BOOT_ERR_FATAL_MASK \ + ((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) | \ + (1 << CPU_BOOT_ERR_PLL_FAIL) | \ + (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) | \ + (1 << CPU_BOOT_ERR_BINNING_FAIL) | \ + (1 << CPU_BOOT_ERR_DRAM_SKIPPED) | \ + (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) | \ + (1 << CPU_BOOT_ERR_EEPROM_FAIL)) + +/* + * CPU error bits in BOOT_ERROR registers + * + * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed. + * DRAM is not reliable to use. + * + * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the + * image provided by the host has failed. + * + * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed. + * Boot continues as usual, but keep in + * mind this is a warning. + * + * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped. + * Skipping DRAM initialization has been + * requested (e.g. strap, command, etc.) + * and FW skipped the DRAM initialization. + * Host can initialize the DRAM. + * + * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped. + * Meaning the BMC data might not be + * available until reset. + * + * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready. + * BMC has not provided the NIC data yet. + * Once provided this bit will be cleared. + * + * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed. + * The NIC FW loading and initialization + * failed. This means NICs are not usable. + * + * CPU_BOOT_ERR0_SECURITY_NOT_RDY Chip security initialization has been + * started, but is not ready yet - chip + * cannot be accessed. + * + * CPU_BOOT_ERR0_SECURITY_FAIL Security related tasks have failed. + * The tasks are security init (root of + * trust), boot authentication (chain of + * trust), data packets authentication. + * + * CPU_BOOT_ERR0_EFUSE_FAIL Reading from eFuse failed. + * The PCI device ID might be wrong. + * + * CPU_BOOT_ERR0_PRI_IMG_VER_FAIL Verification of primary image failed. + * It mean that ppboot checksum + * verification for the preboot primary + * image has failed to match expected + * checksum. Trying to program image again + * might solve this. + * + * CPU_BOOT_ERR0_SEC_IMG_VER_FAIL Verification of secondary image failed. + * It mean that ppboot checksum + * verification for the preboot secondary + * image has failed to match expected + * checksum. Trying to program image again + * might solve this. + * + * CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one + * of the PLLs remains in REF_CLK + * + * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support + * should be contacted. + * + * CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR Critical error was detected during + * the execution of ppboot or preboot. + * for example: stack overflow. + * + * CPU_BOOT_ERR0_BINNING_FAIL Binning settings failed, meaning + * malfunctioning components might still be + * in use. + * + * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed. + * + * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature + * sensor. + * + * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults + * are used. + * + * CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL Failed scrubbing the Engines/ARCFarm + * memories. Boot disabled until reset. + * + * CPU_BOOT_ERR0_ENABLED Error registers enabled. + * This is a main indication that the + * running FW populates the error + * registers. Meaning the error bits are + * not garbage, but actual error statuses. + */ +#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) +#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << CPU_BOOT_ERR_FIT_CORRUPTED) +#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << CPU_BOOT_ERR_TS_INIT_FAIL) +#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << CPU_BOOT_ERR_DRAM_SKIPPED) +#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED) +#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY) +#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << CPU_BOOT_ERR_NIC_FW_FAIL) +#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << CPU_BOOT_ERR_SECURITY_NOT_RDY) +#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << CPU_BOOT_ERR_SECURITY_FAIL) +#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << CPU_BOOT_ERR_EFUSE_FAIL) +#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL) +#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL) +#define CPU_BOOT_ERR0_PLL_FAIL (1 << CPU_BOOT_ERR_PLL_FAIL) +#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) +#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR) +#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) +#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) +#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) +#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL) +#define CPU_BOOT_ERR0_ENG_ARC_MEM_SCRUB_FAIL (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) +#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) +#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) + +enum cpu_boot_dev_sts { + CPU_BOOT_DEV_STS_SECURITY_EN = 0, + CPU_BOOT_DEV_STS_DEBUG_EN = 1, + CPU_BOOT_DEV_STS_WATCHDOG_EN = 2, + CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3, + CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4, + CPU_BOOT_DEV_STS_E2E_CRED_EN = 5, + CPU_BOOT_DEV_STS_HBM_CRED_EN = 6, + CPU_BOOT_DEV_STS_RL_EN = 7, + CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8, + CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9, + CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10, + CPU_BOOT_DEV_STS_PLL_INFO_EN = 11, + CPU_BOOT_DEV_STS_SP_SRAM_EN = 12, + CPU_BOOT_DEV_STS_CLK_GATE_EN = 13, + CPU_BOOT_DEV_STS_HBM_ECC_EN = 14, + CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15, + CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16, + CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17, + CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18, + CPU_BOOT_DEV_STS_DYN_PLL_EN = 19, + CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20, + CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21, + CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22, + CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23, + CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24, + CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25, + CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26, + CPU_BOOT_DEV_STS_ENABLED = 31, + CPU_BOOT_DEV_STS_SCND_EN = 63, + CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */ +}; + +/* + * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers + * + * CPU_BOOT_DEV_STS0_SECURITY_EN Security is Enabled. + * This is an indication for security + * enabled in FW, which means that + * all conditions for security are met: + * device is indicated as security enabled, + * registers are protected, and device + * uses keys for image verification. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_DEBUG_EN Debug is enabled. + * Enabled when JTAG or DEBUG is enabled + * in FW. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_WATCHDOG_EN Watchdog is enabled. + * Watchdog is enabled in FW. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_DRAM_INIT_EN DRAM initialization is enabled. + * DRAM initialization has been done in FW. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_BMC_WAIT_EN Waiting for BMC data enabled. + * If set, it means that during boot, + * FW waited for BMC data. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_E2E_CRED_EN E2E credits initialized. + * FW initialized E2E credits. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_HBM_CRED_EN HBM credits initialized. + * FW initialized HBM credits. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_RL_EN Rate limiter initialized. + * FW initialized rate limiter. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_SRAM_SCR_EN SRAM scrambler enabled. + * FW initialized SRAM scrambler. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_DRAM_SCR_EN DRAM scrambler enabled. + * FW initialized DRAM scrambler. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_FW_HARD_RST_EN FW hard reset procedure is enabled. + * FW has the hard reset procedure + * implemented. This means that FW will + * perform hard reset procedure on + * receiving the halt-machine event. + * Initialized in: preboot, u-boot, linux + * + * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_SP_SRAM_EN SP SRAM is initialized and available + * for use. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled. + * FW initialized Clock Gating. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_HBM_ECC_EN HBM ECC handling Enabled. + * FW handles HBM ECC indications. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN Packets ack value used in the armcpd + * is set to the PI counter. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_LD_COM_EN Flexible FW loading communication + * protocol is enabled. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN FW iATU configuration is enabled. + * This bit if set, means the iATU has been + * configured and is ready for use. + * Initialized in: ppboot + * + * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN NIC MAC channels init is done by FW and + * any access to them is done via the FW. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. + * FW sends to host a bitmap of supported + * PLLs. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from + * previleged entity. FW sets this status + * bit for host. If this bit is set then + * GIC can not be accessed from host. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_EQ_INDEX_EN Event Queue (EQ) index is a running + * index for each new event sent to host. + * This is used as a method in host to + * identify that the waiting event in + * queue is actually a new event which + * was not served before. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to + * prevent IRQs overriding each other. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN + * NIC STAT and XPCS91 access is restricted + * and is done via FW only. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN + * NIC STAT get all is supported. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN + * F/W checks if the device is idle by reading defined set + * of registers. It returns a bitmask of all the engines, + * where a bit is set if the engine is not idle. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_MAP_HWMON_EN + * If set, means f/w supports proprietary + * HWMON enum mapping to cpucp enums. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. + * This is a main indication that the + * running FW populates the device status + * register. Meaning the device status + * bits are not garbage, but actual + * statuses. + * Initialized in: preboot + * + */ +#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << CPU_BOOT_DEV_STS_SECURITY_EN) +#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << CPU_BOOT_DEV_STS_DEBUG_EN) +#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << CPU_BOOT_DEV_STS_WATCHDOG_EN) +#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN) +#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN) +#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << CPU_BOOT_DEV_STS_E2E_CRED_EN) +#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << CPU_BOOT_DEV_STS_HBM_CRED_EN) +#define CPU_BOOT_DEV_STS0_RL_EN (1 << CPU_BOOT_DEV_STS_RL_EN) +#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN) +#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN) +#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN) +#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << CPU_BOOT_DEV_STS_PLL_INFO_EN) +#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << CPU_BOOT_DEV_STS_SP_SRAM_EN) +#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << CPU_BOOT_DEV_STS_CLK_GATE_EN) +#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << CPU_BOOT_DEV_STS_HBM_ECC_EN) +#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN) +#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN) +#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN) +#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << CPU_BOOT_DEV_STS_DYN_PLL_EN) +#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN) +#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN) +#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN) +#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN) +#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN) +#define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) +#define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) + +enum cpu_boot_status { + CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ + CPU_BOOT_STATUS_IN_WFE = 1, + CPU_BOOT_STATUS_DRAM_RDY = 2, + CPU_BOOT_STATUS_SRAM_AVAIL = 3, + CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */ + CPU_BOOT_STATUS_IN_PREBOOT = 5, + CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */ + CPU_BOOT_STATUS_IN_UBOOT = 7, + CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */ + /* U-Boot console prompt activated, commands are not processed */ + CPU_BOOT_STATUS_UBOOT_NOT_READY = 10, + /* Finished NICs init, reported after DRAM and NICs */ + CPU_BOOT_STATUS_NIC_FW_RDY = 11, + CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */ + CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */ + /* Last boot loader progress status, ready to receive commands */ + CPU_BOOT_STATUS_READY_TO_BOOT = 15, + /* Internal Boot finished, ready for boot-fit */ + CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, + /* Internal Security has been initialized, device can be accessed */ + CPU_BOOT_STATUS_SECURITY_READY = 17, + /* FW component is preparing to shutdown and communication with host is not available */ + CPU_BOOT_STATUS_FW_SHUTDOWN_PREP = 18, +}; + +enum kmd_msg { + KMD_MSG_NA = 0, + KMD_MSG_GOTO_WFE, + KMD_MSG_FIT_RDY, + KMD_MSG_SKIP_BMC, + RESERVED, + KMD_MSG_RST_DEV, + KMD_MSG_LAST +}; + +enum cpu_msg_status { + CPU_MSG_CLR = 0, + CPU_MSG_OK, + CPU_MSG_ERR, +}; + +/* communication registers mapping - consider ABI when changing */ +struct cpu_dyn_regs { + __le32 cpu_pq_base_addr_low; + __le32 cpu_pq_base_addr_high; + __le32 cpu_pq_length; + __le32 cpu_pq_init_status; + __le32 cpu_eq_base_addr_low; + __le32 cpu_eq_base_addr_high; + __le32 cpu_eq_length; + __le32 cpu_eq_ci; + __le32 cpu_cq_base_addr_low; + __le32 cpu_cq_base_addr_high; + __le32 cpu_cq_length; + __le32 cpu_pf_pq_pi; + __le32 cpu_boot_dev_sts0; + __le32 cpu_boot_dev_sts1; + __le32 cpu_boot_err0; + __le32 cpu_boot_err1; + __le32 cpu_boot_status; + __le32 fw_upd_sts; + __le32 fw_upd_cmd; + __le32 fw_upd_pending_sts; + __le32 fuse_ver_offset; + __le32 preboot_ver_offset; + __le32 uboot_ver_offset; + __le32 hw_state; + __le32 kmd_msg_to_cpu; + __le32 cpu_cmd_status_to_host; + __le32 gic_host_pi_upd_irq; + __le32 gic_tpc_qm_irq_ctrl; + __le32 gic_mme_qm_irq_ctrl; + __le32 gic_dma_qm_irq_ctrl; + __le32 gic_nic_qm_irq_ctrl; + __le32 gic_dma_core_irq_ctrl; + __le32 gic_host_halt_irq; + __le32 gic_host_ints_irq; + __le32 gic_host_soft_rst_irq; + __le32 gic_rot_qm_irq_ctrl; + __le32 cpu_rst_status; + __le32 eng_arc_irq_ctrl; + __le32 reserved1[20]; /* reserve for future use */ +}; + +/* TODO: remove the desc magic after the code is updated to use message */ +/* HCDM - Habana Communications Descriptor Magic */ +#define HL_COMMS_DESC_MAGIC 0x4843444D +#define HL_COMMS_DESC_VER 3 + +/* HCMv - Habana Communications Message + header version */ +#define HL_COMMS_MSG_MAGIC_VALUE 0x48434D00 +#define HL_COMMS_MSG_MAGIC_MASK 0xFFFFFF00 +#define HL_COMMS_MSG_MAGIC_VER_MASK 0xFF + +#define HL_COMMS_MSG_MAGIC_VER(ver) (HL_COMMS_MSG_MAGIC_VALUE | \ + ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) +#define HL_COMMS_MSG_MAGIC_V0 HL_COMMS_DESC_MAGIC +#define HL_COMMS_MSG_MAGIC_V1 HL_COMMS_MSG_MAGIC_VER(1) +#define HL_COMMS_MSG_MAGIC_V2 HL_COMMS_MSG_MAGIC_VER(2) +#define HL_COMMS_MSG_MAGIC_V3 HL_COMMS_MSG_MAGIC_VER(3) + +#define HL_COMMS_MSG_MAGIC HL_COMMS_MSG_MAGIC_V3 + +#define HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC(magic) \ + (((magic) & HL_COMMS_MSG_MAGIC_MASK) == \ + HL_COMMS_MSG_MAGIC_VALUE) + +#define HL_COMMS_MSG_MAGIC_VALIDATE_VERSION(magic, ver) \ + (((magic) & HL_COMMS_MSG_MAGIC_VER_MASK) >= \ + ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) + +#define HL_COMMS_MSG_MAGIC_VALIDATE(magic, ver) \ + (HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC((magic)) && \ + HL_COMMS_MSG_MAGIC_VALIDATE_VERSION((magic), (ver))) + +enum comms_msg_type { + HL_COMMS_DESC_TYPE = 0, + HL_COMMS_RESET_CAUSE_TYPE = 1, + HL_COMMS_FW_CFG_SKIP_TYPE = 2, + HL_COMMS_BINNING_CONF_TYPE = 3, +}; + +/* + * Binning information shared between LKD and FW + * @tpc_mask_l - TPC binning information lower 64 bit + * @dec_mask - Decoder binning information + * @dram_mask - DRAM binning information + * @edma_mask - EDMA binning information + * @mme_mask_l - MME binning information lower 32 + * @mme_mask_h - MME binning information upper 32 + * @rot_mask - Rotator binning information + * @xbar_mask - xBAR binning information + * @reserved - reserved field for future binning info w/o ABI change + * @tpc_mask_h - TPC binning information upper 64 bit + * @nic_mask - NIC binning information + */ +struct lkd_fw_binning_info { + __le64 tpc_mask_l; + __le32 dec_mask; + __le32 dram_mask; + __le32 edma_mask; + __le32 mme_mask_l; + __le32 mme_mask_h; + __le32 rot_mask; + __le32 xbar_mask; + __le32 reserved0; + __le64 tpc_mask_h; + __le64 nic_mask; + __le32 reserved1[8]; +}; + +/* TODO: remove this struct after the code is updated to use message */ +/* this is the comms descriptor header - meta data */ +struct comms_desc_header { + __le32 magic; /* magic for validation */ + __le32 crc32; /* CRC32 of the descriptor w/o header */ + __le16 size; /* size of the descriptor w/o header */ + __u8 version; /* descriptor version */ + __u8 reserved[5]; /* pad to 64 bit */ +}; + +/* this is the comms message header - meta data */ +struct comms_msg_header { + __le32 magic; /* magic for validation */ + __le32 crc32; /* CRC32 of the message w/o header */ + __le16 size; /* size of the message w/o header */ + __u8 version; /* message payload version */ + __u8 type; /* message type */ + __u8 reserved[4]; /* pad to 64 bit */ +}; + +enum lkd_fw_ascii_msg_lvls { + LKD_FW_ASCII_MSG_ERR = 0, + LKD_FW_ASCII_MSG_WRN = 1, + LKD_FW_ASCII_MSG_INF = 2, + LKD_FW_ASCII_MSG_DBG = 3, +}; + +#define LKD_FW_ASCII_MSG_MAX_LEN 128 +#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */ + +struct lkd_fw_ascii_msg { + __u8 valid; + __u8 msg_lvl; + __u8 reserved[6]; + char msg[LKD_FW_ASCII_MSG_MAX_LEN]; +}; + +/* this is the main FW descriptor - consider ABI when changing */ +struct lkd_fw_comms_desc { + struct comms_desc_header header; + struct cpu_dyn_regs cpu_dyn_regs; + char fuse_ver[VERSION_MAX_LEN]; + char cur_fw_ver[VERSION_MAX_LEN]; + /* can be used for 1 more version w/o ABI change */ + char reserved0[VERSION_MAX_LEN]; + __le64 img_addr; /* address for next FW component load */ + struct lkd_fw_binning_info binning_info; + struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; + __le32 rsvd_mem_size_mb; /* reserved memory size [MB] for FW/SVE */ + char reserved1[4]; +}; + +enum comms_reset_cause { + HL_RESET_CAUSE_UNKNOWN = 0, + HL_RESET_CAUSE_HEARTBEAT = 1, + HL_RESET_CAUSE_TDR = 2, +}; + +/* TODO: remove define after struct name is aligned on all projects */ +#define lkd_msg_comms lkd_fw_comms_msg + +/* this is the comms message descriptor */ +struct lkd_fw_comms_msg { + struct comms_msg_header header; + /* union for future expantions of new messages */ + union { + struct { + struct cpu_dyn_regs cpu_dyn_regs; + char fuse_ver[VERSION_MAX_LEN]; + char cur_fw_ver[VERSION_MAX_LEN]; + /* can be used for 1 more version w/o ABI change */ + char reserved0[VERSION_MAX_LEN]; + /* address for next FW component load */ + __le64 img_addr; + struct lkd_fw_binning_info binning_info; + struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; + /* reserved memory size [MB] for FW/SVE */ + __le32 rsvd_mem_size_mb; + char reserved1[4]; + }; + struct { + __u8 reset_cause; + }; + struct { + __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ + }; + struct lkd_fw_binning_info binning_conf; + }; +}; + +/* + * LKD commands: + * + * COMMS_NOOP Used to clear the command register and no actual + * command is send. + * + * COMMS_CLR_STS Clear status command - FW should clear the + * status register. Used for synchronization + * between the commands as part of the race free + * protocol. + * + * COMMS_RST_STATE Reset the current communication state which is + * kept by FW for proper responses. + * Should be used in the beginning of the + * communication cycle to clean any leftovers from + * previous communication attempts. + * + * COMMS_PREP_DESC Prepare descriptor for setting up the + * communication and other dynamic data: + * struct lkd_fw_comms_desc. + * This command has a parameter stating the next FW + * component size, so the FW can actually prepare a + * space for it and in the status response provide + * the descriptor offset. The Offset of the next FW + * data component is a part of the descriptor + * structure. + * + * COMMS_DATA_RDY The FW data has been uploaded and is ready for + * validation. + * + * COMMS_EXEC Execute the next FW component. + * + * COMMS_RST_DEV Reset the device. + * + * COMMS_GOTO_WFE Execute WFE command. Allowed only on non-secure + * devices. + * + * COMMS_SKIP_BMC Perform actions required for BMC-less servers. + * Do not wait for BMC response. + * + * COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory + * space is allocated in a ELBI access only + * address range. + * + */ +enum comms_cmd { + COMMS_NOOP = 0, + COMMS_CLR_STS = 1, + COMMS_RST_STATE = 2, + COMMS_PREP_DESC = 3, + COMMS_DATA_RDY = 4, + COMMS_EXEC = 5, + COMMS_RST_DEV = 6, + COMMS_GOTO_WFE = 7, + COMMS_SKIP_BMC = 8, + COMMS_PREP_DESC_ELBI = 10, + COMMS_INVLD_LAST +}; + +#define COMMS_COMMAND_SIZE_SHIFT 0 +#define COMMS_COMMAND_SIZE_MASK 0x1FFFFFF +#define COMMS_COMMAND_CMD_SHIFT 27 +#define COMMS_COMMAND_CMD_MASK 0xF8000000 + +/* + * LKD command to FW register structure + * @size - FW component size + * @cmd - command from enum comms_cmd + */ +struct comms_command { + union { /* bit fields are only for FW use */ + struct { + u32 size :25; /* 32MB max. */ + u32 reserved :2; + enum comms_cmd cmd :5; /* 32 commands */ + }; + __le32 val; + }; +}; + +/* + * FW status + * + * COMMS_STS_NOOP Used to clear the status register and no actual + * status is provided. + * + * COMMS_STS_ACK Command has been received and recognized. + * + * COMMS_STS_OK Command execution has finished successfully. + * + * COMMS_STS_ERR Command execution was unsuccessful and resulted + * in error. + * + * COMMS_STS_VALID_ERR FW validation has failed. + * + * COMMS_STS_TIMEOUT_ERR Command execution has timed out. + */ +enum comms_sts { + COMMS_STS_NOOP = 0, + COMMS_STS_ACK = 1, + COMMS_STS_OK = 2, + COMMS_STS_ERR = 3, + COMMS_STS_VALID_ERR = 4, + COMMS_STS_TIMEOUT_ERR = 5, + COMMS_STS_INVLD_LAST +}; + +/* RAM types for FW components loading - defines the base address */ +enum comms_ram_types { + COMMS_SRAM = 0, + COMMS_DRAM = 1, +}; + +#define COMMS_STATUS_OFFSET_SHIFT 0 +#define COMMS_STATUS_OFFSET_MASK 0x03FFFFFF +#define COMMS_STATUS_OFFSET_ALIGN_SHIFT 2 +#define COMMS_STATUS_RAM_TYPE_SHIFT 26 +#define COMMS_STATUS_RAM_TYPE_MASK 0x0C000000 +#define COMMS_STATUS_STATUS_SHIFT 28 +#define COMMS_STATUS_STATUS_MASK 0xF0000000 + +/* + * FW status to LKD register structure + * @offset - an offset from the base of the ram_type shifted right by + * 2 bits (always aligned to 32 bits). + * Allows a maximum addressable offset of 256MB from RAM base. + * Example: for real offset in RAM of 0x800000 (8MB), the value + * in offset field is (0x800000 >> 2) = 0x200000. + * @ram_type - the RAM type that should be used for offset from + * enum comms_ram_types + * @status - status from enum comms_sts + */ +struct comms_status { + union { /* bit fields are only for FW use */ + struct { + u32 offset :26; + enum comms_ram_types ram_type :2; + enum comms_sts status :4; /* 16 statuses */ + }; + __le32 val; + }; +}; + +#define NAME_MAX_LEN 32 /* bytes */ +struct hl_module_data { + __u8 name[NAME_MAX_LEN]; + __u8 version[VERSION_MAX_LEN]; +}; + +/** + * struct hl_component_versions - versions associated with hl component. + * @struct_size: size of all the struct (including dynamic size of modules). + * @modules_offset: offset of the modules field in this struct. + * @component: version of the component itself. + * @fw_os: Firmware OS Version. + * @comp_name: Name of the component. + * @modules_counter: number of set bits in modules_mask. + * @reserved: reserved for future use. + * @modules: versions of the component's modules. Elborated explanation in + * struct cpucp_versions. + */ +struct hl_component_versions { + __le16 struct_size; + __le16 modules_offset; + __u8 component[VERSION_MAX_LEN]; + __u8 fw_os[VERSION_MAX_LEN]; + __u8 comp_name[NAME_MAX_LEN]; + __u8 modules_counter; + __u8 reserved[3]; + struct hl_module_data modules[]; +}; + +/* Max size of fit size */ +#define HL_FW_VERSIONS_FIT_SIZE 4096 + +#endif /* HL_BOOT_IF_H */ diff --git a/include/linux/hid-roccat.h b/include/linux/hid-roccat.h index 3214fb0815fc..753654fff07f 100644 --- a/include/linux/hid-roccat.h +++ b/include/linux/hid-roccat.h @@ -16,7 +16,7 @@ #ifdef __KERNEL__ -int roccat_connect(struct class *klass, struct hid_device *hid, +int roccat_connect(const struct class *klass, struct hid_device *hid, int report_size); void roccat_disconnect(int minor); int roccat_report_event(int minor, u8 const *data); diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 13b1e65fbdcc..6730ee900ee1 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -21,6 +21,10 @@ #define HID_USAGE_SENSOR_ALS 0x200041 #define HID_USAGE_SENSOR_DATA_LIGHT 0x2004d0 #define HID_USAGE_SENSOR_LIGHT_ILLUM 0x2004d1 +#define HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE 0x2004d2 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY 0x2004d3 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_X 0x2004d4 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_Y 0x2004d5 /* PROX (200011) */ #define HID_USAGE_SENSOR_PROX 0x200011 diff --git a/include/linux/hid.h b/include/linux/hid.h index 39e21e3815ad..bf43f3ff6664 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -341,6 +341,29 @@ struct hid_item { */ #define MAX_USBHID_BOOT_QUIRKS 4 +/** + * DOC: HID quirks + * | @HID_QUIRK_NOTOUCH: + * | @HID_QUIRK_IGNORE: ignore this device + * | @HID_QUIRK_NOGET: + * | @HID_QUIRK_HIDDEV_FORCE: + * | @HID_QUIRK_BADPAD: + * | @HID_QUIRK_MULTI_INPUT: + * | @HID_QUIRK_HIDINPUT_FORCE: + * | @HID_QUIRK_ALWAYS_POLL: + * | @HID_QUIRK_INPUT_PER_APP: + * | @HID_QUIRK_X_INVERT: + * | @HID_QUIRK_Y_INVERT: + * | @HID_QUIRK_SKIP_OUTPUT_REPORTS: + * | @HID_QUIRK_SKIP_OUTPUT_REPORT_ID: + * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP: + * | @HID_QUIRK_HAVE_SPECIAL_DRIVER: + * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE: + * | @HID_QUIRK_FULLSPEED_INTERVAL: + * | @HID_QUIRK_NO_INIT_REPORTS: + * | @HID_QUIRK_NO_IGNORE: + * | @HID_QUIRK_NO_INPUT_SYNC: + */ /* BIT(0) reserved for backward compatibility, was HID_QUIRK_INVERT */ #define HID_QUIRK_NOTOUCH BIT(1) #define HID_QUIRK_IGNORE BIT(2) @@ -360,6 +383,7 @@ struct hid_item { #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) #define HID_QUIRK_HAVE_SPECIAL_DRIVER BIT(19) #define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE BIT(20) +#define HID_QUIRK_NOINVERT BIT(21) #define HID_QUIRK_FULLSPEED_INTERVAL BIT(28) #define HID_QUIRK_NO_INIT_REPORTS BIT(29) #define HID_QUIRK_NO_IGNORE BIT(30) @@ -555,9 +579,9 @@ struct hid_input { struct hid_report *report; struct input_dev *input; const char *name; - bool registered; struct list_head reports; /* the list of reports */ unsigned int application; /* application usage for this input */ + bool registered; }; enum hid_type { @@ -655,6 +679,7 @@ struct hid_device { /* device report descriptor */ struct list_head debug_list; spinlock_t debug_list_lock; wait_queue_head_t debug_wait; + struct kref ref; unsigned int id; /* system unique id */ @@ -663,6 +688,8 @@ struct hid_device { /* device report descriptor */ #endif /* CONFIG_BPF */ }; +void hiddev_free(struct kref *ref); + #define to_hid_device(pdev) \ container_of(pdev, struct hid_device, dev) @@ -809,11 +836,11 @@ struct hid_driver { void (*feature_mapping)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage); -#ifdef CONFIG_PM + int (*suspend)(struct hid_device *hdev, pm_message_t message); int (*resume)(struct hid_device *hdev); int (*reset_resume)(struct hid_device *hdev); -#endif + /* private: */ struct device_driver driver; }; diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 68da30625a6c..be20cff4ba73 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -439,6 +439,50 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) kunmap_local(addr); } +static inline void memcpy_from_folio(char *to, struct folio *folio, + size_t offset, size_t len) +{ + VM_BUG_ON(offset + len > folio_size(folio)); + + do { + const char *from = kmap_local_folio(folio, offset); + size_t chunk = len; + + if (folio_test_highmem(folio) && + chunk > PAGE_SIZE - offset_in_page(offset)) + chunk = PAGE_SIZE - offset_in_page(offset); + memcpy(to, from, chunk); + kunmap_local(from); + + to += chunk; + offset += chunk; + len -= chunk; + } while (len > 0); +} + +static inline void memcpy_to_folio(struct folio *folio, size_t offset, + const char *from, size_t len) +{ + VM_BUG_ON(offset + len > folio_size(folio)); + + do { + char *to = kmap_local_folio(folio, offset); + size_t chunk = len; + + if (folio_test_highmem(folio) && + chunk > PAGE_SIZE - offset_in_page(offset)) + chunk = PAGE_SIZE - offset_in_page(offset); + memcpy(to, from, chunk); + kunmap_local(to); + + from += chunk; + offset += chunk; + len -= chunk; + } while (len > 0); + + flush_dcache_folio(folio); +} + /** * memcpy_from_file_folio - Copy some bytes from a file folio. * @to: The destination buffer. @@ -507,10 +551,24 @@ static inline void folio_zero_range(struct folio *folio, zero_user_segments(&folio->page, start, start + length, 0, 0); } -static inline void unmap_and_put_page(struct page *page, void *addr) +/** + * folio_release_kmap - Unmap a folio and drop a refcount. + * @folio: The folio to release. + * @addr: The address previously returned by a call to kmap_local_folio(). + * + * It is common, eg in directory handling to kmap a folio. This function + * unmaps the folio and drops the refcount that was being held to keep the + * folio alive while we accessed it. + */ +static inline void folio_release_kmap(struct folio *folio, void *addr) { kunmap_local(addr); - put_page(page); + folio_put(folio); +} + +static inline void unmap_and_put_page(struct page *page, void *addr) +{ + folio_release_kmap(page_folio(page), addr); } #endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index a7d54d4d41fd..ddc7ebb70523 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -104,7 +104,7 @@ enum qm_stop_reason { QM_NORMAL, QM_SOFT_RESET, - QM_FLR, + QM_DOWN, }; enum qm_state { @@ -144,6 +144,13 @@ enum qm_vf_state { QM_NOT_READY, }; +enum qm_misc_ctl_bits { + QM_DRIVER_REMOVING = 0x0, + QM_RST_SCHED, + QM_RESETTING, + QM_MODULE_PARAM, +}; + enum qm_cap_bits { QM_SUPPORT_DB_ISOLATION = 0x0, QM_SUPPORT_FUNC_QOS, @@ -269,6 +276,7 @@ struct hisi_qm_poll_data { struct hisi_qm *qm; struct work_struct work; u16 *qp_finish_id; + u16 eqe_num; }; /** @@ -285,6 +293,18 @@ struct qm_err_isolate { struct list_head qm_hw_errs; }; +struct qm_rsv_buf { + struct qm_sqc *sqc; + struct qm_cqc *cqc; + struct qm_eqc *eqc; + struct qm_aeqc *aeqc; + dma_addr_t sqc_dma; + dma_addr_t cqc_dma; + dma_addr_t eqc_dma; + dma_addr_t aeqc_dma; + struct qm_dma qcdma; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -317,6 +337,7 @@ struct hisi_qm { dma_addr_t cqc_dma; dma_addr_t eqe_dma; dma_addr_t aeqe_dma; + struct qm_rsv_buf xqc_buf; struct hisi_qm_status status; const struct hisi_qm_err_ini *err_ini; @@ -471,6 +492,20 @@ static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) mutex_init(&qm_list->lock); } +static inline void hisi_qm_add_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_add_tail(&qm->list, &qm_list->list); + mutex_unlock(&qm_list->lock); +} + +static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_del(&qm->list); + mutex_unlock(&qm_list->lock); +} + int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); @@ -516,8 +551,8 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); void hisi_qm_dev_shutdown(struct pci_dev *pdev); void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard); +void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard); int hisi_qm_resume(struct device *dev); int hisi_qm_suspend(struct device *dev); void hisi_qm_pm_uninit(struct hisi_qm *qm); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0ee140176f10..f2044d5a652b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU -int hrtimers_dead_cpu(unsigned int cpu); +int hrtimers_cpu_dying(unsigned int cpu); #else -#define hrtimers_dead_cpu NULL +#define hrtimers_cpu_dying NULL #endif #endif diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e718dbe928ba..fa0350b0812a 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -140,9 +140,7 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -void prep_transhuge_page(struct page *page); -void free_transhuge_page(struct page *page); - +void folio_prep_large_rmappable(struct folio *folio); bool can_split_folio(struct folio *folio, int *pextra_pins); int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) @@ -282,7 +280,7 @@ static inline bool hugepage_vma_check(struct vm_area_struct *vma, return false; } -static inline void prep_transhuge_page(struct page *page) {} +static inline void folio_prep_large_rmappable(struct folio *folio) {} #define transparent_hugepage_flags 0UL diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ca3c8e10f24a..236ec7b63c54 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -26,9 +26,11 @@ typedef struct { unsigned long pd; } hugepd_t; #define __hugepd(x) ((hugepd_t) { (x) }) #endif +void free_huge_folio(struct folio *folio); + #ifdef CONFIG_HUGETLB_PAGE -#include <linux/mempolicy.h> +#include <linux/pagemap.h> #include <linux/shm.h> #include <asm/tlbflush.h> @@ -58,6 +60,7 @@ struct resv_map { long adds_in_progress; struct list_head region_cache; long region_cache_count; + struct rw_semaphore rw_sema; #ifdef CONFIG_CGROUP_HUGETLB /* * On private mappings, the counter to uncharge reservations is stored @@ -131,14 +134,12 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *, struct vm_area_struct *); struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, - unsigned long address, unsigned int flags); -long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, - struct page **, unsigned long *, unsigned long *, - long, unsigned int, int *); + unsigned long address, unsigned int flags, + unsigned int *page_mask); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *, zap_flags_t); -void __unmap_hugepage_range_final(struct mmu_gather *tlb, +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags); @@ -167,7 +168,6 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); void folio_putback_active_hugetlb(struct folio *folio); void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); -void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); @@ -246,6 +246,25 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); +extern void __hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *begin, unsigned long *end); +extern void __hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details); + +static inline void hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_begin(vma, start, end); +} + +static inline void hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_end(vma, details); +} + void hugetlb_vma_lock_read(struct vm_area_struct *vma); void hugetlb_vma_unlock_read(struct vm_area_struct *vma); void hugetlb_vma_lock_write(struct vm_area_struct *vma); @@ -261,6 +280,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long cp_flags); bool is_hugetlb_entry_migration(pte_t pte); +bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -297,19 +317,23 @@ static inline void adjust_range_if_pmd_sharing_possible( { } -static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, - unsigned long address, unsigned int flags) +static inline void hugetlb_zap_begin( + struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) { - BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ } -static inline long follow_hugetlb_page(struct mm_struct *mm, - struct vm_area_struct *vma, struct page **pages, - unsigned long *position, unsigned long *nr_pages, - long i, unsigned int flags, int *nonblocking) +static inline void hugetlb_zap_end( + struct vm_area_struct *vma, + struct zap_details *details) { - BUG(); - return 0; +} + +static inline struct page *hugetlb_follow_page_mask( + struct vm_area_struct *vma, unsigned long address, unsigned int flags, + unsigned int *page_mask) +{ + BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ } static inline int copy_hugetlb_page_range(struct mm_struct *dst, @@ -450,7 +474,7 @@ static inline long hugetlb_change_protection( return 0; } -static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, +static inline void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags) @@ -521,7 +545,6 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) } struct hugetlbfs_inode_info { - struct shared_policy policy; struct inode vfs_inode; unsigned int seals; }; @@ -725,8 +748,6 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); -struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, - unsigned long address); int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, @@ -821,6 +842,12 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h) return huge_page_size(h) / 512; } +static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, + struct address_space *mapping, pgoff_t idx) +{ + return filemap_lock_folio(mapping, idx << huge_page_order(h)); +} + #include <asm/hugetlb.h> #ifndef is_hugepage_only_range @@ -851,11 +878,6 @@ static inline struct hstate *folio_hstate(struct folio *folio) return size_to_hstate(folio_size(folio)); } -static inline struct hstate *page_hstate(struct page *page) -{ - return folio_hstate(page_folio(page)); -} - static inline unsigned hstate_index_to_shift(unsigned index) { return hstates[index].order + PAGE_SHIFT; @@ -998,7 +1020,9 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } #endif @@ -1007,6 +1031,11 @@ void hugetlb_register_node(struct node *node); void hugetlb_unregister_node(struct node *node); #endif +/* + * Check if a given raw @page in a hugepage is HWPOISON. + */ +bool is_raw_hwpoison_page_in_hugepage(struct page *page); + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; @@ -1015,6 +1044,12 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio return NULL; } +static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, + struct address_space *mapping, pgoff_t idx) +{ + return NULL; +} + static inline int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) { @@ -1035,13 +1070,6 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, return NULL; } -static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, - struct vm_area_struct *vma, - unsigned long address) -{ - return NULL; -} - static inline int __alloc_bootmem_huge_page(struct hstate *h) { return 0; @@ -1067,11 +1095,6 @@ static inline struct hstate *folio_hstate(struct folio *folio) return NULL; } -static inline struct hstate *page_hstate(struct page *page) -{ - return NULL; -} - static inline struct hstate *size_to_hstate(unsigned long size) { return NULL; @@ -1187,7 +1210,7 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, } static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { } @@ -1245,6 +1268,8 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma) return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; } +bool __vma_private_lock(struct vm_area_struct *vma); + /* * Safe version of huge_pte_offset() to check the locks. See comments * above huge_pte_offset(). diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 3d82d91f49ac..e5d64b8b59c2 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -22,13 +22,6 @@ struct resv_map; struct file_region; #ifdef CONFIG_CGROUP_HUGETLB -/* - * Minimum page order trackable by hugetlb cgroup. - * At least 3 pages are necessary for all the tracking information. - * The second tail page contains all of the hugetlb-specific fields. - */ -#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE) - enum hugetlb_memory_event { HUGETLB_MAX, HUGETLB_NR_MEMORY_EVENTS, @@ -68,8 +61,6 @@ static inline struct hugetlb_cgroup * __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) - return NULL; if (rsvd) return folio->_hugetlb_cgroup_rsvd; else @@ -91,8 +82,6 @@ static inline void __set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg, bool rsvd) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) - return; if (rsvd) folio->_hugetlb_cgroup_rsvd = h_cg; else diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 7fbb45911273..db199d653dd1 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -90,9 +90,6 @@ extern int dbg_reserve_bp_slot(struct perf_event *bp); extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); -int arch_reserve_bp_slot(struct perf_event *bp); -void arch_release_bp_slot(struct perf_event *bp); -void arch_unregister_hw_breakpoint(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8a3115516a1b..136e9842120e 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -63,5 +63,6 @@ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs); +extern long hwrng_yield(struct hwrng *rng); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 3ac3974b3c78..2b00faf98017 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -348,7 +348,7 @@ struct vmtransfer_page_packet_header { u8 sender_owns_set; u8 reserved; u32 range_cnt; - struct vmtransfer_page_range ranges[1]; + struct vmtransfer_page_range ranges[]; } __packed; struct vmgpadl_packet_header { @@ -665,8 +665,8 @@ struct vmbus_channel_initiate_contact { u64 interrupt_page; struct { u8 msg_sint; - u8 padding1[3]; - u32 padding2; + u8 msg_vtl; + u8 reserved[6]; }; }; u64 monitor_page1; diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h new file mode 100644 index 000000000000..4d5da161c225 --- /dev/null +++ b/include/linux/i2c-atr.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * I2C Address Translator + * + * Copyright (c) 2019,2022 Luca Ceresoli <luca@lucaceresoli.net> + * Copyright (c) 2022,2023 Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> + * + * Based on i2c-mux.h + */ + +#ifndef _LINUX_I2C_ATR_H +#define _LINUX_I2C_ATR_H + +#include <linux/i2c.h> +#include <linux/types.h> + +struct device; +struct fwnode_handle; +struct i2c_atr; + +/** + * struct i2c_atr_ops - Callbacks from ATR to the device driver. + * @attach_client: Notify the driver of a new device connected on a child + * bus, with the alias assigned to it. The driver must + * configure the hardware to use the alias. + * @detach_client: Notify the driver of a device getting disconnected. The + * driver must configure the hardware to stop using the + * alias. + * + * All these functions return 0 on success, a negative error code otherwise. + */ +struct i2c_atr_ops { + int (*attach_client)(struct i2c_atr *atr, u32 chan_id, + const struct i2c_client *client, u16 alias); + void (*detach_client)(struct i2c_atr *atr, u32 chan_id, + const struct i2c_client *client); +}; + +/** + * i2c_atr_new() - Allocate and initialize an I2C ATR helper. + * @parent: The parent (upstream) adapter + * @dev: The device acting as an ATR + * @ops: Driver-specific callbacks + * @max_adapters: Maximum number of child adapters + * + * The new ATR helper is connected to the parent adapter but has no child + * adapters. Call i2c_atr_add_adapter() to add some. + * + * Call i2c_atr_delete() to remove. + * + * Return: pointer to the new ATR helper object, or ERR_PTR + */ +struct i2c_atr *i2c_atr_new(struct i2c_adapter *parent, struct device *dev, + const struct i2c_atr_ops *ops, int max_adapters); + +/** + * i2c_atr_delete - Delete an I2C ATR helper. + * @atr: I2C ATR helper to be deleted. + * + * Precondition: all the adapters added with i2c_atr_add_adapter() must be + * removed by calling i2c_atr_del_adapter(). + */ +void i2c_atr_delete(struct i2c_atr *atr); + +/** + * i2c_atr_add_adapter - Create a child ("downstream") I2C bus. + * @atr: The I2C ATR + * @chan_id: Index of the new adapter (0 .. max_adapters-1). This value is + * passed to the callbacks in `struct i2c_atr_ops`. + * @adapter_parent: The device used as the parent of the new i2c adapter, or NULL + * to use the i2c-atr device as the parent. + * @bus_handle: The fwnode handle that points to the adapter's i2c + * peripherals, or NULL. + * + * After calling this function a new i2c bus will appear. Adding and removing + * devices on the downstream bus will result in calls to the + * &i2c_atr_ops->attach_client and &i2c_atr_ops->detach_client callbacks for the + * driver to assign an alias to the device. + * + * The adapter's fwnode is set to @bus_handle, or if @bus_handle is NULL the + * function looks for a child node whose 'reg' property matches the chan_id + * under the i2c-atr device's 'i2c-atr' node. + * + * Call i2c_atr_del_adapter() to remove the adapter. + * + * Return: 0 on success, a negative error code otherwise. + */ +int i2c_atr_add_adapter(struct i2c_atr *atr, u32 chan_id, + struct device *adapter_parent, + struct fwnode_handle *bus_handle); + +/** + * i2c_atr_del_adapter - Remove a child ("downstream") I2C bus added by + * i2c_atr_add_adapter(). If no I2C bus has been added + * this function is a no-op. + * @atr: The I2C ATR + * @chan_id: Index of the adapter to be removed (0 .. max_adapters-1) + */ +void i2c_atr_del_adapter(struct i2c_atr *atr, u32 chan_id); + +/** + * i2c_atr_set_driver_data - Set private driver data to the i2c-atr instance. + * @atr: The I2C ATR + * @data: Pointer to the data to store + */ +void i2c_atr_set_driver_data(struct i2c_atr *atr, void *data); + +/** + * i2c_atr_get_driver_data - Get the stored drive data. + * @atr: The I2C ATR + * + * Return: Pointer to the stored data + */ +void *i2c_atr_get_driver_data(struct i2c_atr *atr); + +#endif /* _LINUX_I2C_ATR_H */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 3430cc2b05a6..0dae9db27538 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -237,7 +237,6 @@ enum i2c_driver_flags { * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) * @probe: Callback for device binding - * @probe_new: Transitional callback for device binding - do not use * @remove: Callback for device unbinding * @shutdown: Callback for device shutdown * @alert: Alert callback, for example for the SMBus alert protocol @@ -272,16 +271,8 @@ enum i2c_driver_flags { struct i2c_driver { unsigned int class; - union { /* Standard driver model interfaces */ - int (*probe)(struct i2c_client *client); - /* - * Legacy callback that was part of a conversion of .probe(). - * Today it has the same semantic as .probe(). Don't use for new - * code. - */ - int (*probe_new)(struct i2c_client *client); - }; + int (*probe)(struct i2c_client *client); void (*remove)(struct i2c_client *client); diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 90fa83464f00..84ed77c04940 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -96,7 +96,7 @@ enum i3c_dcr { /** * struct i3c_device_info - I3C device information - * @pid: Provisional ID + * @pid: Provisioned ID * @bcr: Bus Characteristic Register * @dcr: Device Characteristic Register * @static_addr: static/I2C address diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 0b52da4f2346..24c1863b86e2 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -24,6 +24,12 @@ struct i2c_client; +/* notifier actions. notifier call data is the struct i3c_bus */ +enum { + I3C_NOTIFY_BUS_ADD, + I3C_NOTIFY_BUS_REMOVE, +}; + struct i3c_master_controller; struct i3c_bus; struct i3c_device; @@ -129,6 +135,7 @@ struct i3c_ibi_slot { * rejected by the master * @num_slots: number of IBI slots reserved for this device * @enabled: reflect the IBI status + * @wq: workqueue used to execute IBI handlers. * @handler: IBI handler specified at i3c_device_request_ibi() call time. This * handler will be called from the controller workqueue, and as such * is allowed to sleep (though it is recommended to process the IBI @@ -151,6 +158,7 @@ struct i3c_device_ibi_info { unsigned int max_payload_len; unsigned int num_slots; unsigned int enabled; + struct workqueue_struct *wq; void (*handler)(struct i3c_device *dev, const struct i3c_ibi_payload *payload); }; @@ -166,7 +174,7 @@ struct i3c_device_ibi_info { * assigned a dynamic address by the master. Will be used during * bus initialization to assign it a specific dynamic address * before starting DAA (Dynamic Address Assignment) - * @pid: I3C Provisional ID exposed by the device. This is a unique identifier + * @pid: I3C Provisioned ID exposed by the device. This is a unique identifier * that may be used to attach boardinfo to i3c_dev_desc when the device * does not have a static address * @of_node: optional DT node in case the device has been described in the DT @@ -469,7 +477,7 @@ struct i3c_master_controller_ops { * @boardinfo.i2c: list of I2C boardinfo objects * @boardinfo: board-level information attached to devices connected on the bus * @bus: I3C bus exposed by this master - * @wq: workqueue used to execute IBI handlers. Can also be used by master + * @wq: workqueue which can be used by master * drivers if they need to postpone operations that need to take place * in a thread context. Typical examples are Hot Join processing which * requires taking the bus lock in maintenance, which in turn, can only @@ -652,4 +660,9 @@ void i3c_master_queue_ibi(struct i3c_dev_desc *dev, struct i3c_ibi_slot *slot); struct i3c_ibi_slot *i3c_master_get_free_ibi_slot(struct i3c_dev_desc *dev); +void i3c_for_each_bus_locked(int (*fn)(struct i3c_bus *bus, void *data), + void *data); +int i3c_register_notifier(struct notifier_block *nb); +int i3c_unregister_notifier(struct notifier_block *nb); + #endif /* I3C_MASTER_H */ diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index db0f4fcfdaf4..e3b3b0fa2a8f 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -85,12 +85,10 @@ extern void icmpv6_param_prob_reason(struct sk_buff *skb, struct flowi6; struct in6_addr; -extern void icmpv6_flow_init(struct sock *sk, - struct flowi6 *fl6, - u8 type, - const struct in6_addr *saddr, - const struct in6_addr *daddr, - int oif); + +void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type, + const struct in6_addr *saddr, + const struct in6_addr *daddr, int oif); static inline void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { diff --git a/include/linux/idr.h b/include/linux/idr.h index a0dce14090a9..da5f5fa4a3a6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -200,7 +200,7 @@ static inline void idr_preload_end(void) */ #define idr_for_each_entry_ul(idr, entry, tmp, id) \ for (tmp = 0, id = 0; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /** @@ -224,10 +224,12 @@ static inline void idr_preload_end(void) * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. + * After normal termination @entry is left with the value NULL. This + * is convenient for a "not found" value. */ #define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \ for (tmp = id; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /* diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 4b998090898e..2b5e500bf093 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -307,6 +307,13 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_TRIGGER_TYPE_BQRP 0x6 #define IEEE80211_TRIGGER_TYPE_NFRP 0x7 +/* UL-bandwidth within common_info of trigger frame */ +#define IEEE80211_TRIGGER_ULBW_MASK 0xc0000 +#define IEEE80211_TRIGGER_ULBW_20MHZ 0x0 +#define IEEE80211_TRIGGER_ULBW_40MHZ 0x1 +#define IEEE80211_TRIGGER_ULBW_80MHZ 0x2 +#define IEEE80211_TRIGGER_ULBW_160_80P80MHZ 0x3 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; @@ -836,9 +843,14 @@ enum ieee80211_preq_target_flags { }; /** - * struct ieee80211_quiet_ie + * struct ieee80211_quiet_ie - Quiet element + * @count: Quiet Count + * @period: Quiet Period + * @duration: Quiet Duration + * @offset: Quiet Offset * - * This structure refers to "Quiet information element" + * This structure represents the payload of the "Quiet element" as + * described in IEEE Std 802.11-2020 section 9.4.2.22. */ struct ieee80211_quiet_ie { u8 count; @@ -848,9 +860,15 @@ struct ieee80211_quiet_ie { } __packed; /** - * struct ieee80211_msrment_ie + * struct ieee80211_msrment_ie - Measurement element + * @token: Measurement Token + * @mode: Measurement Report Mode + * @type: Measurement Type + * @request: Measurement Request or Measurement Report * - * This structure refers to "Measurement Request/Report information element" + * This structure represents the payload of both the "Measurement + * Request element" and the "Measurement Report element" as described + * in IEEE Std 802.11-2020 sections 9.4.2.20 and 9.4.2.21. */ struct ieee80211_msrment_ie { u8 token; @@ -860,9 +878,14 @@ struct ieee80211_msrment_ie { } __packed; /** - * struct ieee80211_channel_sw_ie + * struct ieee80211_channel_sw_ie - Channel Switch Announcement element + * @mode: Channel Switch Mode + * @new_ch_num: New Channel Number + * @count: Channel Switch Count * - * This structure refers to "Channel Switch Announcement information element" + * This structure represents the payload of the "Channel Switch + * Announcement element" as described in IEEE Std 802.11-2020 section + * 9.4.2.18. */ struct ieee80211_channel_sw_ie { u8 mode; @@ -871,9 +894,14 @@ struct ieee80211_channel_sw_ie { } __packed; /** - * struct ieee80211_ext_chansw_ie + * struct ieee80211_ext_chansw_ie - Extended Channel Switch Announcement element + * @mode: Channel Switch Mode + * @new_operating_class: New Operating Class + * @new_ch_num: New Channel Number + * @count: Channel Switch Count * - * This structure represents the "Extended Channel Switch Announcement element" + * This structure represents the "Extended Channel Switch Announcement + * element" as described in IEEE Std 802.11-2020 section 9.4.2.52. */ struct ieee80211_ext_chansw_ie { u8 mode; @@ -894,8 +922,14 @@ struct ieee80211_sec_chan_offs_ie { /** * struct ieee80211_mesh_chansw_params_ie - mesh channel switch parameters IE + * @mesh_ttl: Time To Live + * @mesh_flags: Flags + * @mesh_reason: Reason Code + * @mesh_pre_value: Precedence Value * - * This structure represents the "Mesh Channel Switch Paramters element" + * This structure represents the payload of the "Mesh Channel Switch + * Parameters element" as described in IEEE Std 802.11-2020 section + * 9.4.2.102. */ struct ieee80211_mesh_chansw_params_ie { u8 mesh_ttl; @@ -906,6 +940,13 @@ struct ieee80211_mesh_chansw_params_ie { /** * struct ieee80211_wide_bw_chansw_ie - wide bandwidth channel switch IE + * @new_channel_width: New Channel Width + * @new_center_freq_seg0: New Channel Center Frequency Segment 0 + * @new_center_freq_seg1: New Channel Center Frequency Segment 1 + * + * This structure represents the payload of the "Wide Bandwidth + * Channel Switch element" as described in IEEE Std 802.11-2020 + * section 9.4.2.160. */ struct ieee80211_wide_bw_chansw_ie { u8 new_channel_width; @@ -913,22 +954,42 @@ struct ieee80211_wide_bw_chansw_ie { } __packed; /** - * struct ieee80211_tim + * struct ieee80211_tim_ie - Traffic Indication Map information element + * @dtim_count: DTIM Count + * @dtim_period: DTIM Period + * @bitmap_ctrl: Bitmap Control + * @required_octet: "Syntatic sugar" to force the struct size to the + * minimum valid size when carried in a non-S1G PPDU + * @virtual_map: Partial Virtual Bitmap * - * This structure refers to "Traffic Indication Map information element" + * This structure represents the payload of the "TIM element" as + * described in IEEE Std 802.11-2020 section 9.4.2.5. Note that this + * definition is only applicable when the element is carried in a + * non-S1G PPDU. When the TIM is carried in an S1G PPDU, the Bitmap + * Control and Partial Virtual Bitmap may not be present. */ struct ieee80211_tim_ie { u8 dtim_count; u8 dtim_period; u8 bitmap_ctrl; - /* variable size: 1 - 251 bytes */ - u8 virtual_map[1]; + union { + u8 required_octet; + DECLARE_FLEX_ARRAY(u8, virtual_map); + }; } __packed; /** - * struct ieee80211_meshconf_ie + * struct ieee80211_meshconf_ie - Mesh Configuration element + * @meshconf_psel: Active Path Selection Protocol Identifier + * @meshconf_pmetric: Active Path Selection Metric Identifier + * @meshconf_congest: Congestion Control Mode Identifier + * @meshconf_synch: Synchronization Method Identifier + * @meshconf_auth: Authentication Protocol Identifier + * @meshconf_form: Mesh Formation Info + * @meshconf_cap: Mesh Capability (see &enum mesh_config_capab_flags) * - * This structure refers to "Mesh Configuration information element" + * This structure represents the payload of the "Mesh Configuration + * element" as described in IEEE Std 802.11-2020 section 9.4.2.97. */ struct ieee80211_meshconf_ie { u8 meshconf_psel; @@ -950,6 +1011,9 @@ struct ieee80211_meshconf_ie { * is ongoing * @IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL: STA is in deep sleep mode or has * neighbors in deep sleep mode + * + * Enumerates the "Mesh Capability" as described in IEEE Std + * 802.11-2020 section 9.4.2.97.7. */ enum mesh_config_capab_flags { IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS = 0x01, @@ -960,7 +1024,7 @@ enum mesh_config_capab_flags { #define IEEE80211_MESHCONF_FORM_CONNECTED_TO_GATE 0x1 -/** +/* * mesh channel switch parameters element's flag indicator * */ @@ -969,9 +1033,17 @@ enum mesh_config_capab_flags { #define WLAN_EID_CHAN_SWITCH_PARAM_REASON BIT(2) /** - * struct ieee80211_rann_ie + * struct ieee80211_rann_ie - RANN (root announcement) element + * @rann_flags: Flags + * @rann_hopcount: Hop Count + * @rann_ttl: Element TTL + * @rann_addr: Root Mesh STA Address + * @rann_seq: HWMP Sequence Number + * @rann_interval: Interval + * @rann_metric: Metric * - * This structure refers to "Root Announcement information element" + * This structure represents the payload of the "RANN element" as + * described in IEEE Std 802.11-2020 section 9.4.2.111. */ struct ieee80211_rann_ie { u8 rann_flags; @@ -993,7 +1065,7 @@ enum ieee80211_ht_chanwidth_values { }; /** - * enum ieee80211_opmode_bits - VHT operating mode field bits + * enum ieee80211_vht_opmode_bits - VHT operating mode field bits * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK: channel width mask * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: 20 MHz channel width * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width @@ -1042,9 +1114,12 @@ enum ieee80211_s1g_chanwidth { #define WLAN_USER_POSITION_LEN 16 /** - * struct ieee80211_tpc_report_ie + * struct ieee80211_tpc_report_ie - TPC Report element + * @tx_power: Transmit Power + * @link_margin: Link Margin * - * This structure refers to "TPC Report element" + * This structure represents the payload of the "TPC Report element" as + * described in IEEE Std 802.11-2020 section 9.4.2.16. */ struct ieee80211_tpc_report_ie { u8 tx_power; @@ -1062,9 +1137,14 @@ struct ieee80211_addba_ext_ie { } __packed; /** - * struct ieee80211_s1g_bcn_compat_ie + * struct ieee80211_s1g_bcn_compat_ie - S1G Beacon Compatibility element + * @compat_info: Compatibility Information + * @beacon_int: Beacon Interval + * @tsf_completion: TSF Completion * - * S1G Beacon Compatibility element + * This structure represents the payload of the "S1G Beacon + * Compatibility element" as described in IEEE Std 802.11-2020 section + * 9.4.2.196. */ struct ieee80211_s1g_bcn_compat_ie { __le16 compat_info; @@ -1073,9 +1153,15 @@ struct ieee80211_s1g_bcn_compat_ie { } __packed; /** - * struct ieee80211_s1g_oper_ie + * struct ieee80211_s1g_oper_ie - S1G Operation element + * @ch_width: S1G Operation Information Channel Width + * @oper_class: S1G Operation Information Operating Class + * @primary_ch: S1G Operation Information Primary Channel Number + * @oper_ch: S1G Operation Information Channel Center Frequency + * @basic_mcs_nss: Basic S1G-MCS and NSS Set * - * S1G Operation element + * This structure represents the payload of the "S1G Operation + * element" as described in IEEE Std 802.11-2020 section 9.4.2.212. */ struct ieee80211_s1g_oper_ie { u8 ch_width; @@ -1086,9 +1172,13 @@ struct ieee80211_s1g_oper_ie { } __packed; /** - * struct ieee80211_aid_response_ie + * struct ieee80211_aid_response_ie - AID Response element + * @aid: AID/Group AID + * @switch_count: AID Switch Count + * @response_int: AID Response Interval * - * AID Response element + * This structure represents the payload of the "AID Response element" + * as described in IEEE Std 802.11-2020 section 9.4.2.194. */ struct ieee80211_aid_response_ie { __le16 aid; @@ -1163,6 +1253,30 @@ struct ieee80211_twt_setup { u8 params[]; } __packed; +#define IEEE80211_TTLM_MAX_CNT 2 +#define IEEE80211_TTLM_CONTROL_DIRECTION 0x03 +#define IEEE80211_TTLM_CONTROL_DEF_LINK_MAP 0x04 +#define IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT 0x08 +#define IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT 0x10 +#define IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE 0x20 + +#define IEEE80211_TTLM_DIRECTION_DOWN 0 +#define IEEE80211_TTLM_DIRECTION_UP 1 +#define IEEE80211_TTLM_DIRECTION_BOTH 2 + +/** + * struct ieee80211_ttlm_elem - TID-To-Link Mapping element + * + * Defined in section 9.4.2.314 in P802.11be_D4 + * + * @control: the first part of control field + * @optional: the second part of control field + */ +struct ieee80211_ttlm_elem { + u8 control; + u8 optional[]; +} __packed; + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -1489,7 +1603,7 @@ struct ieee80211_tdls_data { /* * Peer-to-Peer IE attribute related definitions. */ -/** +/* * enum ieee80211_p2p_attr_id - identifies type of peer-to-peer attribute. */ enum ieee80211_p2p_attr_id { @@ -1539,11 +1653,17 @@ struct ieee80211_p2p_noa_attr { #define IEEE80211_P2P_OPPPS_CTWINDOW_MASK 0x7F /** - * struct ieee80211_bar - HT Block Ack Request + * struct ieee80211_bar - Block Ack Request frame format + * @frame_control: Frame Control + * @duration: Duration + * @ra: RA + * @ta: TA + * @control: BAR Control + * @start_seq_num: Starting Sequence Number (see Figure 9-37) * - * This structure refers to "HT BlockAckReq" as - * described in 802.11n draft section 7.2.1.7.1 - */ + * This structure represents the "BlockAckReq frame format" + * as described in IEEE Std 802.11-2020 section 9.3.1.7. +*/ struct ieee80211_bar { __le16 frame_control; __le16 duration; @@ -1563,13 +1683,17 @@ struct ieee80211_bar { #define IEEE80211_HT_MCS_MASK_LEN 10 /** - * struct ieee80211_mcs_info - MCS information + * struct ieee80211_mcs_info - Supported MCS Set field * @rx_mask: RX mask * @rx_highest: highest supported RX rate. If set represents * the highest supported RX data rate in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest RX data rate supported. * @tx_params: TX parameters + * @reserved: Reserved bits + * + * This structure represents the "Supported MCS Set field" as + * described in IEEE Std 802.11-2020 section 9.4.2.55.4. */ struct ieee80211_mcs_info { u8 rx_mask[IEEE80211_HT_MCS_MASK_LEN]; @@ -1588,6 +1712,8 @@ struct ieee80211_mcs_info { #define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 #define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 +#define IEEE80211_HT_MCS_CHAINS(mcs) ((mcs) == 32 ? 1 : (1 + ((mcs) >> 3))) + /* * 802.11n D5.0 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream @@ -1600,10 +1726,16 @@ struct ieee80211_mcs_info { (IEEE80211_HT_MCS_UNEQUAL_MODULATION_START / 8) /** - * struct ieee80211_ht_cap - HT capabilities + * struct ieee80211_ht_cap - HT capabilities element + * @cap_info: HT Capability Information + * @ampdu_params_info: A-MPDU Parameters + * @mcs: Supported MCS Set + * @extended_ht_cap_info: HT Extended Capabilities + * @tx_BF_cap_info: Transmit Beamforming Capabilities + * @antenna_selection_info: ASEL Capability * - * This structure is the "HT capabilities element" as - * described in 802.11n D5.0 7.3.2.57 + * This structure represents the payload of the "HT Capabilities + * element" as described in IEEE Std 802.11-2020 section 9.4.2.55. */ struct ieee80211_ht_cap { __le16 cap_info; @@ -1691,9 +1823,14 @@ enum ieee80211_min_mpdu_spacing { /** * struct ieee80211_ht_operation - HT operation IE + * @primary_chan: Primary Channel + * @ht_param: HT Operation Information parameters + * @operation_mode: HT Operation Information operation mode + * @stbc_param: HT Operation Information STBC params + * @basic_set: Basic HT-MCS Set * - * This structure is the "HT operation element" as - * described in 802.11n-2009 7.3.2.57 + * This structure represents the payload of the "HT Operation + * element" as described in IEEE Std 802.11-2020 section 9.4.2.56. */ struct ieee80211_ht_operation { u8 primary_chan; @@ -1862,9 +1999,12 @@ struct ieee80211_vht_operation { /** * struct ieee80211_he_cap_elem - HE capabilities element + * @mac_cap_info: HE MAC Capabilities Information + * @phy_cap_info: HE PHY Capabilities Information * - * This structure is the "HE capabilities element" fixed fields as - * described in P802.11ax_D4.0 section 9.4.2.242.2 and 9.4.2.242.3 + * This structure represents the fixed fields of the payload of the + * "HE capabilities element" as described in IEEE Std 802.11ax-2021 + * sections 9.4.2.248.2 and 9.4.2.248.3. */ struct ieee80211_he_cap_elem { u8 mac_cap_info[6]; @@ -1923,35 +2063,45 @@ struct ieee80211_he_mcs_nss_supp { } __packed; /** - * struct ieee80211_he_operation - HE capabilities element + * struct ieee80211_he_operation - HE Operation element + * @he_oper_params: HE Operation Parameters + BSS Color Information + * @he_mcs_nss_set: Basic HE-MCS And NSS Set + * @optional: Optional fields VHT Operation Information, Max Co-Hosted + * BSSID Indicator, and 6 GHz Operation Information * - * This structure is the "HE operation element" fields as - * described in P802.11ax_D4.0 section 9.4.2.243 + * This structure represents the payload of the "HE Operation + * element" as described in IEEE Std 802.11ax-2021 section 9.4.2.249. */ struct ieee80211_he_operation { __le32 he_oper_params; __le16 he_mcs_nss_set; - /* Optional 0,1,3,4,5,7 or 8 bytes: depends on @he_oper_params */ u8 optional[]; } __packed; /** - * struct ieee80211_he_spr - HE spatial reuse element + * struct ieee80211_he_spr - Spatial Reuse Parameter Set element + * @he_sr_control: SR Control + * @optional: Optional fields Non-SRG OBSS PD Max Offset, SRG OBSS PD + * Min Offset, SRG OBSS PD Max Offset, SRG BSS Color + * Bitmap, and SRG Partial BSSID Bitmap * - * This structure is the "HE spatial reuse element" element as - * described in P802.11ax_D4.0 section 9.4.2.241 + * This structure represents the payload of the "Spatial Reuse + * Parameter Set element" as described in IEEE Std 802.11ax-2021 + * section 9.4.2.252. */ struct ieee80211_he_spr { u8 he_sr_control; - /* Optional 0 to 19 bytes: depends on @he_sr_control */ u8 optional[]; } __packed; /** * struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field + * @aifsn: ACI/AIFSN + * @ecw_min_max: ECWmin/ECWmax + * @mu_edca_timer: MU EDCA Timer * - * This structure is the "MU AC Parameter Record" fields as - * described in P802.11ax_D4.0 section 9.4.2.245 + * This structure represents the "MU AC Parameter Record" as described + * in IEEE Std 802.11ax-2021 section 9.4.2.251, Figure 9-788p. */ struct ieee80211_he_mu_edca_param_ac_rec { u8 aifsn; @@ -1961,9 +2111,14 @@ struct ieee80211_he_mu_edca_param_ac_rec { /** * struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element + * @mu_qos_info: QoS Info + * @ac_be: MU AC_BE Parameter Record + * @ac_bk: MU AC_BK Parameter Record + * @ac_vi: MU AC_VI Parameter Record + * @ac_vo: MU AC_VO Parameter Record * - * This structure is the "MU EDCA Parameter Set element" fields as - * described in P802.11ax_D4.0 section 9.4.2.245 + * This structure represents the payload of the "MU EDCA Parameter Set + * element" as described in IEEE Std 802.11ax-2021 section 9.4.2.251. */ struct ieee80211_mu_edca_param_set { u8 mu_qos_info; @@ -2177,9 +2332,9 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, * enum ieee80211_ap_reg_power - regulatory power for a Access Point * * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode - * @IEEE80211_REG_LPI: Indoor Access Point - * @IEEE80211_REG_SP: Standard power Access Point - * @IEEE80211_REG_VLP: Very low power Access Point + * @IEEE80211_REG_LPI_AP: Indoor Access Point + * @IEEE80211_REG_SP_AP: Standard power Access Point + * @IEEE80211_REG_VLP_AP: Very low power Access Point * @IEEE80211_REG_AP_POWER_AFTER_LAST: internal * @IEEE80211_REG_AP_POWER_MAX: maximum value */ @@ -2567,7 +2722,7 @@ static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len) #define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 /** - * ieee80211_he_6ghz_oper - HE 6 GHz operation Information field + * struct ieee80211_he_6ghz_oper - HE 6 GHz operation Information field * @primary: primary channel * @control: control flags * @ccfs0: channel center frequency segment 0 @@ -2614,9 +2769,13 @@ enum ieee80211_tx_power_intrpt_type { }; /** - * struct ieee80211_tx_pwr_env + * struct ieee80211_tx_pwr_env - Transmit Power Envelope + * @tx_power_info: Transmit Power Information field + * @tx_power: Maximum Transmit Power field * - * This structure represents the "Transmit Power Envelope element" + * This structure represents the payload of the "Transmit Power + * Envelope element" as described in IEEE Std 802.11ax-2021 section + * 9.4.2.161 */ struct ieee80211_tx_pwr_env { u8 tx_power_info; @@ -2671,12 +2830,14 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) static inline const struct ieee80211_he_6ghz_oper * ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) { - const u8 *ret = (const void *)&he_oper->optional; + const u8 *ret; u32 he_oper_params; if (!he_oper) return NULL; + ret = (const void *)&he_oper->optional; + he_oper_params = le32_to_cpu(he_oper->he_oper_params); if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO)) @@ -3013,6 +3174,28 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) return len >= needed; } +#define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) + +struct ieee80211_bandwidth_indication { + u8 params; + struct ieee80211_eht_operation_info info; +} __packed; + +static inline bool +ieee80211_bandwidth_indication_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_bandwidth_indication *bwi = (const void *)data; + + if (len < sizeof(*bwi)) + return false; + + if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT && + len < sizeof(*bwi) + 2) + return false; + + return true; +} + #define LISTEN_INT_USF GENMASK(15, 14) #define LISTEN_INT_UI GENMASK(13, 0) @@ -3470,6 +3653,8 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, + WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109, + WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, }; /* Action category code */ @@ -4237,6 +4422,36 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, } /** + * ieee80211_is_protected_dual_of_public_action - check if skb contains a + * protected dual of public action management frame + * @skb: the skb containing the frame, length will be checked + * + * Return: true if the skb contains a protected dual of public action + * management frame, false otherwise. + */ +static inline bool +ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) +{ + u8 action; + + if (!ieee80211_is_public_action((void *)skb->data, skb->len) || + skb->len < IEEE80211_MIN_ACTION_SIZE + 1) + return false; + + action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE); + + return action != WLAN_PUB_ACTION_20_40_BSS_COEX && + action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN && + action != WLAN_PUB_ACTION_MSMT_PILOT && + action != WLAN_PUB_ACTION_TDLS_DISCOVER_RES && + action != WLAN_PUB_ACTION_LOC_TRACK_NOTI && + action != WLAN_PUB_ACTION_FTM_REQUEST && + action != WLAN_PUB_ACTION_FTM_RESPONSE && + action != WLAN_PUB_ACTION_FILS_DISCOVERY && + action != WLAN_PUB_ACTION_VENDOR_SPECIFIC; +} + +/** * _ieee80211_is_group_privacy_action - check if frame is a group addressed * privacy action frame * @hdr: the frame @@ -4307,12 +4522,11 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, /** * ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet) * @skb: the skb containing the frame, length will not be checked - * @hdr_size: the size of the ieee80211_hdr that starts at skb->data * * This function assumes the frame is a data frame, and that the network header * is in the correct place. */ -static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size) +static inline int ieee80211_get_tdls_action(struct sk_buff *skb) { if (!skb_is_nonlinear(skb) && skb->len > (skb_network_offset(skb) + 2)) { @@ -4478,7 +4692,7 @@ static inline bool for_each_element_completed(const struct element *element, return (const u8 *)element == (const u8 *)data + datalen; } -/** +/* * RSNX Capabilities: * bits 0-3: Field length (n-1) */ @@ -4721,7 +4935,7 @@ ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) } /** - * ieee80211_mle_get_eml_sync_delay - returns the medium sync delay + * ieee80211_mle_get_eml_med_sync_delay - returns the medium sync delay * @data: pointer to the multi link EHT IE * * The element is assumed to be of the correct type (BASIC) and big enough, @@ -5006,6 +5220,39 @@ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, fixed + prof->sta_info_len - 1 <= len; } +static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) +{ + const struct ieee80211_ttlm_elem *t2l = (const void *)data; + u8 control, fixed = sizeof(*t2l), elem_len = 0; + + if (len < fixed) + return false; + + control = t2l->control; + + if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) + elem_len += 2; + if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) + elem_len += 3; + + if (!(control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP)) { + u8 bm_size; + + elem_len += 1; + if (len < fixed + elem_len) + return false; + + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) + bm_size = 1; + else + bm_size = 2; + + elem_len += hweight8(t2l->optional[0]) * bm_size; + } + + return len >= fixed + elem_len; +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 1ed52441972f..10a1e81434cb 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -53,6 +53,10 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_NONE: case ARPHRD_RAWIP: case ARPHRD_PIMREG: + /* PPP adds its l2 header automatically in ppp_start_xmit(). + * This makes it look like an l3 device to __bpf_redirect() and tcf_mirred_init(). + */ + case ARPHRD_PPP: return false; default: return true; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 8de6b6e67829..cdc684e04a2f 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -162,8 +162,8 @@ struct team_option { bool per_port; unsigned int array_size; /* != 0 means the option is array */ enum team_option_type type; - int (*init)(struct team *team, struct team_option_inst_info *info); - int (*getter)(struct team *team, struct team_gsetter_ctx *ctx); + void (*init)(struct team *team, struct team_option_inst_info *info); + void (*getter)(struct team *team, struct team_gsetter_ctx *ctx); int (*setter)(struct team *team, struct team_gsetter_ctx *ctx); }; @@ -189,6 +189,8 @@ struct team { struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; + const struct header_ops *header_ops_cache; + struct mutex lock; /* used for overall locking, e.g. port lists write */ /* diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 6ba71957851e..3028af87716e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -408,7 +408,7 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, * @mac_len: MAC header length including outer vlan headers * * Inserts the VLAN tag into @skb as part of the payload at offset mac_len - * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * Returns a VLAN tagged skb. This might change skb->head. * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. @@ -437,7 +437,7 @@ static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload - * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * Returns a VLAN tagged skb. This might change skb->head. * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. @@ -457,7 +457,7 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload - * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * Returns a VLAN tagged skb. This might change skb->head. * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. diff --git a/include/linux/igmp.h b/include/linux/igmp.h index ebf4349a53af..5171231f70a8 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -39,7 +39,7 @@ struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; - __be32 sl_addr[]; + __be32 sl_addr[] __counted_by(sl_max); }; #define IP_SFBLOCK 10 /* allocate this many at once */ diff --git a/include/linux/iio/common/inv_sensors_timestamp.h b/include/linux/iio/common/inv_sensors_timestamp.h new file mode 100644 index 000000000000..a47d304d1ba7 --- /dev/null +++ b/include/linux/iio/common/inv_sensors_timestamp.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Invensense, Inc. + */ + +#ifndef INV_SENSORS_TIMESTAMP_H_ +#define INV_SENSORS_TIMESTAMP_H_ + +/** + * struct inv_sensors_timestamp_chip - chip internal properties + * @clock_period: internal clock period in ns + * @jitter: acceptable jitter in per-mille + * @init_period: chip initial period at reset in ns + */ +struct inv_sensors_timestamp_chip { + uint32_t clock_period; + uint32_t jitter; + uint32_t init_period; +}; + +/** + * struct inv_sensors_timestamp_interval - timestamps interval + * @lo: interval lower bound + * @up: interval upper bound + */ +struct inv_sensors_timestamp_interval { + int64_t lo; + int64_t up; +}; + +/** + * struct inv_sensors_timestamp_acc - accumulator for computing an estimation + * @val: current estimation of the value, the mean of all values + * @idx: current index of the next free place in values table + * @values: table of all measured values, use for computing the mean + */ +struct inv_sensors_timestamp_acc { + uint32_t val; + size_t idx; + uint32_t values[32]; +}; + +/** + * struct inv_sensors_timestamp - timestamp management states + * @chip: chip internal characteristics + * @min_period: minimal acceptable clock period + * @max_period: maximal acceptable clock period + * @it: interrupts interval timestamps + * @timestamp: store last timestamp for computing next data timestamp + * @mult: current internal period multiplier + * @new_mult: new set internal period multiplier (not yet effective) + * @period: measured current period of the sensor + * @chip_period: accumulator for computing internal chip period + */ +struct inv_sensors_timestamp { + struct inv_sensors_timestamp_chip chip; + uint32_t min_period; + uint32_t max_period; + struct inv_sensors_timestamp_interval it; + int64_t timestamp; + uint32_t mult; + uint32_t new_mult; + uint32_t period; + struct inv_sensors_timestamp_acc chip_period; +}; + +void inv_sensors_timestamp_init(struct inv_sensors_timestamp *ts, + const struct inv_sensors_timestamp_chip *chip); + +int inv_sensors_timestamp_update_odr(struct inv_sensors_timestamp *ts, + uint32_t period, bool fifo); + +void inv_sensors_timestamp_interrupt(struct inv_sensors_timestamp *ts, + uint32_t fifo_period, size_t fifo_nb, + size_t sensor_nb, int64_t timestamp); + +static inline int64_t inv_sensors_timestamp_pop(struct inv_sensors_timestamp *ts) +{ + ts->timestamp += ts->period; + return ts->timestamp; +} + +void inv_sensors_timestamp_apply_odr(struct inv_sensors_timestamp *ts, + uint32_t fifo_period, size_t fifo_nb, + unsigned int fifo_no); + +static inline void inv_sensors_timestamp_reset(struct inv_sensors_timestamp *ts) +{ + const struct inv_sensors_timestamp_interval interval_init = {0LL, 0LL}; + + ts->it = interval_init; + ts->timestamp = 0; +} + +#endif diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 6802596b017c..e9910b41d48e 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -201,8 +201,9 @@ struct iio_dev * @chan: The channel being queried. * @val: Value read back. * - * Note raw reads from iio channels are in adc counts and hence - * scale will need to be applied if standard units required. + * Note, if standard units are required, raw reads from iio channels + * need the offset (default 0) and scale (default 1) to be applied + * as (raw + offset) * scale. */ int iio_read_channel_raw(struct iio_channel *chan, int *val); @@ -212,8 +213,9 @@ int iio_read_channel_raw(struct iio_channel *chan, * @chan: The channel being queried. * @val: Value read back. * - * Note raw reads from iio channels are in adc counts and hence - * scale will need to be applied if standard units required. + * Note, if standard units are required, raw reads from iio channels + * need the offset (default 0) and scale (default 1) to be applied + * as (raw + offset) * scale. * * In opposit to the normal iio_read_channel_raw this function * returns the average of multiple reads. @@ -281,8 +283,9 @@ int iio_read_channel_attribute(struct iio_channel *chan, int *val, * @chan: The channel being queried. * @val: Value being written. * - * Note raw writes to iio channels are in dac counts and hence - * scale will need to be applied if standard units required. + * Note that for raw writes to iio channels, if the value provided is + * in standard units, the affect of the scale and offset must be removed + * as (value / scale) - offset. */ int iio_write_channel_raw(struct iio_channel *chan, int val); @@ -292,12 +295,25 @@ int iio_write_channel_raw(struct iio_channel *chan, int val); * @chan: The channel being queried. * @val: Value read back. * - * Note raw reads from iio channels are in adc counts and hence - * scale will need to be applied if standard units are required. + * Note, if standard units are required, raw reads from iio channels + * need the offset (default 0) and scale (default 1) to be applied + * as (raw + offset) * scale. */ int iio_read_max_channel_raw(struct iio_channel *chan, int *val); /** + * iio_read_min_channel_raw() - read minimum available raw value from a given + * channel, i.e. the minimum possible value. + * @chan: The channel being queried. + * @val: Value read back. + * + * Note, if standard units are required, raw reads from iio channels + * need the offset (default 0) and scale (default 1) to be applied + * as (raw + offset) * scale. + */ +int iio_read_min_channel_raw(struct iio_channel *chan, int *val); + +/** * iio_read_avail_channel_raw() - read available raw values from a given channel * @chan: The channel being queried. * @vals: Available values read back. @@ -308,8 +324,9 @@ int iio_read_max_channel_raw(struct iio_channel *chan, int *val); * For ranges, three vals are always returned; min, step and max. * For lists, all the possible values are enumerated. * - * Note raw available values from iio channels are in adc counts and - * hence scale will need to be applied if standard units are required. + * Note, if standard units are required, raw available values from iio + * channels need the offset (default 0) and scale (default 1) to be applied + * as (raw + offset) * scale. */ int iio_read_avail_channel_raw(struct iio_channel *chan, const int **vals, int *length); diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 202e55b0a28b..d0ce3b71106a 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -427,6 +427,8 @@ struct iio_trigger; /* forward declaration */ * @write_event_config: set if the event is enabled. * @read_event_value: read a configuration value associated with the event. * @write_event_value: write a configuration value for the event. + * @read_event_label: function to request label name for a specified label, + * for better event identification. * @validate_trigger: function to validate the trigger when the * current trigger gets changed. * @update_scan_mode: function to configure device and scan buffer when @@ -511,6 +513,12 @@ struct iio_info { enum iio_event_direction dir, enum iio_event_info info, int val, int val2); + int (*read_event_label)(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + enum iio_event_type type, + enum iio_event_direction dir, + char *label); + int (*validate_trigger)(struct iio_dev *indio_dev, struct iio_trigger *trig); int (*update_scan_mode)(struct iio_dev *indio_dev, @@ -556,7 +564,9 @@ struct iio_buffer_setup_ops { * and owner * @buffer: [DRIVER] any buffer present * @scan_bytes: [INTERN] num bytes captured to be fed to buffer demux - * @available_scan_masks: [DRIVER] optional array of allowed bitmasks + * @available_scan_masks: [DRIVER] optional array of allowed bitmasks. Sort the + * array in order of preference, the most preferred + * masks first. * @masklength: [INTERN] the length of the mask established from * channels * @active_scan_mask: [INTERN] union of all scan masks requested by buffers diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h index eff1e6b2595c..0f7fe7b522e3 100644 --- a/include/linux/iio/sw_device.h +++ b/include/linux/iio/sw_device.h @@ -51,9 +51,6 @@ void iio_unregister_sw_device_type(struct iio_sw_device_type *dt); struct iio_sw_device *iio_sw_device_create(const char *, const char *); void iio_sw_device_destroy(struct iio_sw_device *); -int iio_sw_device_type_configfs_register(struct iio_sw_device_type *dt); -void iio_sw_device_type_configfs_unregister(struct iio_sw_device_type *dt); - static inline void iio_swd_group_init_type_name(struct iio_sw_device *d, const char *name, diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h index 47de2443e984..bc77f88df303 100644 --- a/include/linux/iio/sw_trigger.h +++ b/include/linux/iio/sw_trigger.h @@ -51,9 +51,6 @@ void iio_unregister_sw_trigger_type(struct iio_sw_trigger_type *tt); struct iio_sw_trigger *iio_sw_trigger_create(const char *, const char *); void iio_sw_trigger_destroy(struct iio_sw_trigger *); -int iio_sw_trigger_type_configfs_register(struct iio_sw_trigger_type *tt); -void iio_sw_trigger_type_configfs_unregister(struct iio_sw_trigger_type *tt); - static inline void iio_swt_group_init_type_name(struct iio_sw_trigger *t, const char *name, diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 82faa98c719a..117bde7d6ad7 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -19,6 +19,8 @@ enum iio_event_info { IIO_EV_INFO_TIMEOUT, IIO_EV_INFO_RESET_TIMEOUT, IIO_EV_INFO_TAP2_MIN_DELAY, + IIO_EV_INFO_RUNNING_PERIOD, + IIO_EV_INFO_RUNNING_COUNT, }; #define IIO_VAL_INT 1 diff --git a/include/linux/init.h b/include/linux/init.h index 266c3e1640d4..01b52c9c7526 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -89,9 +89,6 @@ __latent_entropy #define __meminitdata __section(".meminit.data") #define __meminitconst __section(".meminit.rodata") -#define __memexit __section(".memexit.text") __exitused __cold notrace -#define __memexitdata __section(".memexit.data") -#define __memexitconst __section(".memexit.rodata") /* For assembly routines */ #define __HEAD .section ".head.text","ax" diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h index cda1f706eaeb..aa0b3ffea935 100644 --- a/include/linux/instruction_pointer.h +++ b/include/linux/instruction_pointer.h @@ -2,7 +2,12 @@ #ifndef _LINUX_INSTRUCTION_POINTER_H #define _LINUX_INSTRUCTION_POINTER_H +#include <asm/linkage.h> + #define _RET_IP_ (unsigned long)__builtin_return_address(0) + +#ifndef _THIS_IP_ #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +#endif #endif /* _LINUX_INSTRUCTION_POINTER_H */ diff --git a/include/linux/int_log.h b/include/linux/int_log.h new file mode 100644 index 000000000000..0a6f58c38b61 --- /dev/null +++ b/include/linux/int_log.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Provides fixed-point logarithm operations. + * + * Copyright (C) 2006 Christoph Pfister (christophpfister@gmail.com) + */ + +#ifndef __LINUX_INT_LOG_H +#define __LINUX_INT_LOG_H + +#include <linux/types.h> + +/** + * intlog2 - computes log2 of a value; the result is shifted left by 24 bits + * + * @value: The value (must be != 0) + * + * to use rational values you can use the following method: + * + * intlog2(value) = intlog2(value * 2^x) - x * 2^24 + * + * Some usecase examples: + * + * intlog2(8) will give 3 << 24 = 3 * 2^24 + * + * intlog2(9) will give 3 << 24 + ... = 3.16... * 2^24 + * + * intlog2(1.5) = intlog2(3) - 2^24 = 0.584... * 2^24 + * + * + * return: log2(value) * 2^24 + */ +extern unsigned int intlog2(u32 value); + +/** + * intlog10 - computes log10 of a value; the result is shifted left by 24 bits + * + * @value: The value (must be != 0) + * + * to use rational values you can use the following method: + * + * intlog10(value) = intlog10(value * 10^x) - x * 2^24 + * + * An usecase example: + * + * intlog10(1000) will give 3 << 24 = 3 * 2^24 + * + * due to the implementation intlog10(1000) might be not exactly 3 * 2^24 + * + * look at intlog2 for similar examples + * + * return: log10(value) * 2^24 + */ +extern unsigned int intlog10(u32 value); + +#endif diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index f505788c05da..ee07393445f9 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -6,6 +6,12 @@ #ifndef _INTEL_TPMI_H_ #define _INTEL_TPMI_H_ +#include <linux/bitfield.h> + +#define TPMI_VERSION_INVALID 0xff +#define TPMI_MINOR_VERSION(val) FIELD_GET(GENMASK(4, 0), val) +#define TPMI_MAJOR_VERSION(val) FIELD_GET(GENMASK(7, 5), val) + /** * struct intel_tpmi_plat_info - Platform information for a TPMI device instance * @package_id: CPU Package id @@ -27,4 +33,6 @@ struct intel_tpmi_plat_info *tpmi_get_platform_data(struct auxiliary_device *aux struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int index); int tpmi_get_resource_count(struct auxiliary_device *auxdev); +int tpmi_get_feature_status(struct auxiliary_device *auxdev, int feature_id, int *locked, + int *disabled); #endif diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index e6d8aca6886d..7ba183f221f1 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -33,7 +33,7 @@ struct icc_node_data { */ struct icc_onecell_data { unsigned int num_nodes; - struct icc_node *nodes[]; + struct icc_node *nodes[] __counted_by(num_nodes); }; struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a92bce40b04b..76121c2bb4f8 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -566,11 +566,15 @@ enum * * _ RCU: * 1) rcutree_migrate_callbacks() migrates the queue. - * 2) rcu_report_dead() reports the final quiescent states. + * 2) rcutree_report_cpu_dead() reports the final quiescent states. * * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue + * + * _ (HR)TIMER_SOFTIRQ: (hr)timers_dead_cpu() migrates the queue */ -#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(TIMER_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ) |\ + BIT(HRTIMER_SOFTIRQ) | BIT(RCU_SOFTIRQ)) + /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 1b7a44b35616..25142a0e2fc2 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -166,6 +166,10 @@ struct io_pgtable_ops { struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); + int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty); }; /** diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index bb9c666bd584..aefb73eeeebf 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -20,8 +20,16 @@ enum io_uring_cmd_flags { IO_URING_F_SQE128 = (1 << 8), IO_URING_F_CQE32 = (1 << 9), IO_URING_F_IOPOLL = (1 << 10), + + /* set when uring wants to cancel a previously issued command */ + IO_URING_F_CANCEL = (1 << 11), + IO_URING_F_COMPAT = (1 << 12), }; +/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ +#define IORING_URING_CMD_CANCELABLE (1U << 30) +#define IORING_URING_CMD_POLLED (1U << 31) + struct io_uring_cmd { struct file *file; const struct io_uring_sqe *sqe; @@ -81,6 +89,10 @@ static inline void io_uring_free(struct task_struct *tsk) if (tsk->io_uring) __io_uring_free(tsk); } +int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); +void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags); +struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd); #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) @@ -116,6 +128,19 @@ static inline const char *io_uring_get_opcode(u8 opcode) { return ""; } +static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + return -EOPNOTSUPP; +} +static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ +} +static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd) +{ + return NULL; +} #endif #endif diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f04ce513fadb..239a4f68801b 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -69,8 +69,8 @@ struct io_uring_task { }; struct io_uring { - u32 head ____cacheline_aligned_in_smp; - u32 tail ____cacheline_aligned_in_smp; + u32 head; + u32 tail; }; /* @@ -176,7 +176,6 @@ struct io_submit_state { unsigned short submit_nr; unsigned int cqes_count; struct blk_plug plug; - struct io_uring_cqe cqes[16]; }; struct io_ev_fd { @@ -205,25 +204,17 @@ struct io_ring_ctx { unsigned int has_evfd: 1; /* all CQEs should be posted only by the submitter task */ unsigned int task_complete: 1; + unsigned int lockless_cq: 1; unsigned int syscall_iopoll: 1; unsigned int poll_activated: 1; unsigned int drain_disabled: 1; unsigned int compat: 1; - enum task_work_notify_mode notify_method; + struct task_struct *submitter_task; + struct io_rings *rings; + struct percpu_ref refs; - /* - * If IORING_SETUP_NO_MMAP is used, then the below holds - * the gup'ed pages for the two rings, and the sqes. - */ - unsigned short n_ring_pages; - unsigned short n_sqe_pages; - struct page **ring_pages; - struct page **sqe_pages; - - struct io_rings *rings; - struct task_struct *submitter_task; - struct percpu_ref refs; + enum task_work_notify_mode notify_method; } ____cacheline_aligned_in_smp; /* submission data */ @@ -261,31 +252,26 @@ struct io_ring_ctx { struct io_buffer_list *io_bl; struct xarray io_bl_xa; - struct list_head io_buffers_cache; struct io_hash_table cancel_table_locked; - struct list_head cq_overflow_list; struct io_alloc_cache apoll_cache; struct io_alloc_cache netmsg_cache; - } ____cacheline_aligned_in_smp; - /* IRQ completion list, under ->completion_lock */ - struct io_wq_work_list locked_free_list; - unsigned int locked_free_nr; - - const struct cred *sq_creds; /* cred used for __io_sq_thread() */ - struct io_sq_data *sq_data; /* if using sq thread polling */ - - struct wait_queue_head sqo_sq_wait; - struct list_head sqd_list; - - unsigned long check_cq; - - unsigned int file_alloc_start; - unsigned int file_alloc_end; + /* + * ->iopoll_list is protected by the ctx->uring_lock for + * io_uring instances that don't use IORING_SETUP_SQPOLL. + * For SQPOLL, only the single threaded io_sq_thread() will + * manipulate the list, hence no extra locking is needed there. + */ + struct io_wq_work_list iopoll_list; + bool poll_multi_queue; - struct xarray personalities; - u32 pers_next; + /* + * Any cancelable uring_cmd is added to this list in + * ->uring_cmd() by io_uring_cmd_insert_cancelable() + */ + struct hlist_head cancelable_uring_cmd; + } ____cacheline_aligned_in_smp; struct { /* @@ -298,39 +284,65 @@ struct io_ring_ctx { unsigned cached_cq_tail; unsigned cq_entries; struct io_ev_fd __rcu *io_ev_fd; - struct wait_queue_head cq_wait; unsigned cq_extra; } ____cacheline_aligned_in_smp; + /* + * task_work and async notification delivery cacheline. Expected to + * regularly bounce b/w CPUs. + */ struct { - spinlock_t completion_lock; - - bool poll_multi_queue; - atomic_t cq_wait_nr; - - /* - * ->iopoll_list is protected by the ctx->uring_lock for - * io_uring instances that don't use IORING_SETUP_SQPOLL. - * For SQPOLL, only the single threaded io_sq_thread() will - * manipulate the list, hence no extra locking is needed there. - */ - struct io_wq_work_list iopoll_list; - struct io_hash_table cancel_table; - struct llist_head work_llist; - - struct list_head io_buffers_comp; + unsigned long check_cq; + atomic_t cq_wait_nr; + atomic_t cq_timeouts; + struct wait_queue_head cq_wait; } ____cacheline_aligned_in_smp; /* timeouts */ struct { spinlock_t timeout_lock; - atomic_t cq_timeouts; struct list_head timeout_list; struct list_head ltimeout_list; unsigned cq_last_tm_flush; } ____cacheline_aligned_in_smp; + struct io_uring_cqe completion_cqes[16]; + + spinlock_t completion_lock; + + /* IRQ completion list, under ->completion_lock */ + struct io_wq_work_list locked_free_list; + unsigned int locked_free_nr; + + struct list_head io_buffers_comp; + struct list_head cq_overflow_list; + struct io_hash_table cancel_table; + + struct hlist_head waitid_list; + +#ifdef CONFIG_FUTEX + struct hlist_head futex_list; + struct io_alloc_cache futex_cache; +#endif + + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ + struct io_sq_data *sq_data; /* if using sq thread polling */ + + struct wait_queue_head sqo_sq_wait; + struct list_head sqd_list; + + unsigned int file_alloc_start; + unsigned int file_alloc_end; + + struct xarray personalities; + u32 pers_next; + + struct list_head io_buffers_cache; + + /* deferred free list, protected by ->uring_lock */ + struct hlist_head io_buf_list; + /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; @@ -346,8 +358,6 @@ struct io_ring_ctx { struct wait_queue_head rsrc_quiesce_wq; unsigned rsrc_quiesce; - struct list_head io_buffers_pages; - #if defined(CONFIG_UNIX) struct socket *ring_sock; #endif @@ -374,6 +384,15 @@ struct io_ring_ctx { unsigned sq_thread_idle; /* protected by ->completion_lock */ unsigned evfd_last_cq_tail; + + /* + * If IORING_SETUP_NO_MMAP is used, then the below holds + * the gup'ed pages for the two rings, and the sqes. + */ + unsigned short n_ring_pages; + unsigned short n_sqe_pages; + struct page **ring_pages; + struct page **sqe_pages; }; struct io_tw_state { @@ -409,13 +428,13 @@ enum { REQ_F_SINGLE_POLL_BIT, REQ_F_DOUBLE_POLL_BIT, REQ_F_PARTIAL_IO_BIT, - REQ_F_CQE32_INIT_BIT, REQ_F_APOLL_MULTISHOT_BIT, REQ_F_CLEAR_POLLIN_BIT, REQ_F_HASH_LOCKED_BIT, /* keep async read/write and isreg together and in order */ REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, + REQ_F_POLL_NO_LAZY_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -479,12 +498,12 @@ enum { REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), /* fast poll multishot mode */ REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), - /* ->extra1 and ->extra2 are initialised */ - REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT), /* recvmsg special flag, clear EPOLLIN */ REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT), /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), + /* don't use lazy poll wake for this request */ + REQ_F_POLL_NO_LAZY = BIT(REQ_F_POLL_NO_LAZY_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); @@ -579,13 +598,7 @@ struct io_kiocb { struct io_task_work io_task_work; unsigned nr_tw; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ - union { - struct hlist_node hash_node; - struct { - u64 extra1; - u64 extra2; - }; - }; + struct hlist_node hash_node; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; /* opcode allocated if it needs to store data for async defer */ @@ -595,6 +608,11 @@ struct io_kiocb { /* custom credentials, valid IFF REQ_F_CREDS is set */ const struct cred *creds; struct io_wq_work work; + + struct { + u64 extra1; + u64 extra2; + } big_cqe; }; struct io_overflow_cqe { diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e2b836c2e119..96dd0acbba44 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -58,7 +58,11 @@ struct vm_fault; #define IOMAP_F_DIRTY (1U << 1) #define IOMAP_F_SHARED (1U << 2) #define IOMAP_F_MERGED (1U << 3) +#ifdef CONFIG_BUFFER_HEAD #define IOMAP_F_BUFFER_HEAD (1U << 4) +#else +#define IOMAP_F_BUFFER_HEAD 0 +#endif /* CONFIG_BUFFER_HEAD */ #define IOMAP_F_XATTR (1U << 5) /* @@ -261,9 +265,10 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); -struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos); +struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); +bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d31642596675..6291aa7b079b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -13,6 +13,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/of.h> +#include <linux/iova_bitmap.h> #include <uapi/linux/iommu.h> #define IOMMU_READ (1 << 0) @@ -37,6 +38,7 @@ struct bus_type; struct device; struct iommu_domain; struct iommu_domain_ops; +struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; struct iommu_fault_event; @@ -64,6 +66,10 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ #define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ +#define __IOMMU_DOMAIN_PLATFORM (1U << 5) + +#define __IOMMU_DOMAIN_NESTED (1U << 6) /* User-managed address space nested + on a stage-2 translation */ #define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ /* @@ -81,6 +87,8 @@ struct iommu_domain_geometry { * invalidation. * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses * represented by mm_struct's. + * IOMMU_DOMAIN_PLATFORM - Legacy domain for drivers that do their own + * dma_api stuff. Do not use in new drivers. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -91,10 +99,14 @@ struct iommu_domain_geometry { __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) #define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) +#define IOMMU_DOMAIN_PLATFORM (__IOMMU_DOMAIN_PLATFORM) +#define IOMMU_DOMAIN_NESTED (__IOMMU_DOMAIN_NESTED) struct iommu_domain { unsigned type; const struct iommu_domain_ops *ops; + const struct iommu_dirty_ops *dirty_ops; + unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; @@ -133,6 +145,7 @@ enum iommu_cap { * usefully support the non-strict DMA flush queue. */ IOMMU_CAP_DEFERRED_FLUSH, + IOMMU_CAP_DIRTY_TRACKING, /* IOMMU supports dirty tracking */ }; /* These are the possible reserved region types */ @@ -196,6 +209,8 @@ enum iommu_dev_features { IOMMU_DEV_FEAT_IOPF, }; +#define IOMMU_NO_PASID (0U) /* Reserved for DMA w/o PASID */ +#define IOMMU_FIRST_GLOBAL_PASID (1U) /*starting range for allocation */ #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; @@ -226,16 +241,115 @@ struct iommu_iotlb_gather { }; /** + * struct iommu_dirty_bitmap - Dirty IOVA bitmap state + * @bitmap: IOVA bitmap + * @gather: Range information for a pending IOTLB flush + */ +struct iommu_dirty_bitmap { + struct iova_bitmap *bitmap; + struct iommu_iotlb_gather *gather; +}; + +/* Read but do not clear any dirty bits */ +#define IOMMU_DIRTY_NO_CLEAR (1 << 0) + +/** + * struct iommu_dirty_ops - domain specific dirty tracking operations + * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain + * @read_and_clear_dirty: Walk IOMMU page tables for dirtied PTEs marshalled + * into a bitmap, with a bit represented as a page. + * Reads the dirty PTE bits and clears it from IO + * pagetables. + */ +struct iommu_dirty_ops { + int (*set_dirty_tracking)(struct iommu_domain *domain, bool enabled); + int (*read_and_clear_dirty)(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty); +}; + +/** + * struct iommu_user_data - iommu driver specific user space data info + * @type: The data type of the user buffer + * @uptr: Pointer to the user buffer for copy_from_user() + * @len: The length of the user buffer in bytes + * + * A user space data is an uAPI that is defined in include/uapi/linux/iommufd.h + * @type, @uptr and @len should be just copied from an iommufd core uAPI struct. + */ +struct iommu_user_data { + unsigned int type; + void __user *uptr; + size_t len; +}; + +/** + * __iommu_copy_struct_from_user - Copy iommu driver specific user space data + * @dst_data: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @src_data: Pointer to a struct iommu_user_data for user space data info + * @data_type: The data type of the @dst_data. Must match with @src_data.type + * @data_len: Length of current user data structure, i.e. sizeof(struct _dst) + * @min_len: Initial length of user data structure for backward compatibility. + * This should be offsetofend using the last member in the user data + * struct that was initially added to include/uapi/linux/iommufd.h + */ +static inline int __iommu_copy_struct_from_user( + void *dst_data, const struct iommu_user_data *src_data, + unsigned int data_type, size_t data_len, size_t min_len) +{ + if (src_data->type != data_type) + return -EINVAL; + if (WARN_ON(!dst_data || !src_data)) + return -EINVAL; + if (src_data->len < min_len || data_len < src_data->len) + return -EINVAL; + return copy_struct_from_user(dst_data, data_len, src_data->uptr, + src_data->len); +} + +/** + * iommu_copy_struct_from_user - Copy iommu driver specific user space data + * @kdst: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @user_data: Pointer to a struct iommu_user_data for user space data info + * @data_type: The data type of the @kdst. Must match with @user_data->type + * @min_last: The last memember of the data structure @kdst points in the + * initial version. + * Return 0 for success, otherwise -error. + */ +#define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ + __iommu_copy_struct_from_user(kdst, user_data, data_type, \ + sizeof(*kdst), \ + offsetofend(typeof(*kdst), min_last)) + +/** * struct iommu_ops - iommu ops and capabilities * @capable: check capability - * @domain_alloc: allocate iommu domain + * @hw_info: report iommu hardware information. The data buffer returned by this + * op is allocated in the iommu driver and freed by the caller after + * use. The information type is one of enum iommu_hw_info_type defined + * in include/uapi/linux/iommufd.h. + * @domain_alloc: allocate and return an iommu domain if success. Otherwise + * NULL is returned. The domain is not fully initialized until + * the caller iommu_domain_alloc() returns. + * @domain_alloc_user: Allocate an iommu domain corresponding to the input + * parameters as defined in include/uapi/linux/iommufd.h. + * Unlike @domain_alloc, it is called only by IOMMUFD and + * must fully initialize the new domain before return. + * Upon success, if the @user_data is valid and the @parent + * points to a kernel-managed domain, the new domain must be + * IOMMU_DOMAIN_NESTED type; otherwise, the @parent must be + * NULL while the @user_data can be optionally provided, the + * new domain must support __IOMMU_DOMAIN_PAGING. + * Upon failure, ERR_PTR must be returned. + * @domain_alloc_paging: Allocate an iommu_domain that can be used for + * UNMANAGED, DMA, and DMA_FQ domain types. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain - * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op - * is to support old IOMMU drivers, new drivers should use - * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -254,17 +368,28 @@ struct iommu_iotlb_gather { * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops + * @identity_domain: An always available, always attachable identity + * translation. + * @blocked_domain: An always available, always attachable blocking + * translation. + * @default_domain: If not NULL this will always be set as the default domain. + * This should be an IDENTITY/BLOCKED/PLATFORM domain. + * Do not use in new drivers. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); + void *(*hw_info)(struct device *dev, u32 *length, u32 *type); /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + struct iommu_domain *(*domain_alloc_user)( + struct device *dev, u32 flags, struct iommu_domain *parent, + const struct iommu_user_data *user_data); + struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); - void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ @@ -287,6 +412,9 @@ struct iommu_ops { const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; + struct iommu_domain *identity_domain; + struct iommu_domain *blocked_domain; + struct iommu_domain *default_domain; }; /** @@ -305,10 +433,8 @@ struct iommu_ops { * * ENODEV - device specific errors, not able to be attached * * <others> - treated as ENODEV by the caller. Use is discouraged * @set_dev_pasid: set an iommu domain to a pasid of device - * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. - * @unmap: unmap a physically contiguous memory region from an iommu domain * @unmap_pages: unmap a number of pages of the same size from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_sync_map: Sync mappings created recently using @map to the hardware @@ -327,20 +453,16 @@ struct iommu_domain_ops { int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); int (*map_pages)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather); size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, - size_t size); + int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); @@ -361,6 +483,7 @@ struct iommu_domain_ops { * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu * @dev: struct device for sysfs handling + * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs */ struct iommu_device { @@ -368,6 +491,7 @@ struct iommu_device { const struct iommu_ops *ops; struct fwnode_handle *fwnode; struct device *dev; + struct iommu_group *singleton_group; u32 max_pasids; }; @@ -409,6 +533,9 @@ struct iommu_fault_param { * @priv: IOMMU Driver private data * @max_pasids: number of PASIDs this device can consume * @attach_deferred: the dma domain attachment is deferred + * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs + * @require_direct: device requires IOMMU_RESV_DIRECT regions + * @shadow_on_flush: IOTLB flushes are used to sync shadow tables * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -422,6 +549,9 @@ struct dev_iommu { void *priv; u32 max_pasids; u32 attach_deferred:1; + u32 pci_32bit_workaround:1; + u32 require_direct:1; + u32 shadow_on_flush:1; }; int iommu_device_register(struct iommu_device *iommu, @@ -450,17 +580,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } -static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) -{ - /* - * Assume that valid ops must be installed if iommu_probe_device() - * has succeeded. The device ops are essentially for internal use - * within the IOMMU subsystem itself, so we should be able to trust - * ourselves not to misuse the helper. - */ - return dev->iommu->iommu_dev->ops; -} - extern int bus_iommu_probe(const struct bus_type *bus); extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); @@ -632,12 +751,35 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) return gather && gather->queued; } +static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty, + struct iova_bitmap *bitmap, + struct iommu_iotlb_gather *gather) +{ + if (gather) + iommu_iotlb_gather_init(gather); + + dirty->bitmap = bitmap; + dirty->gather = gather; +} + +static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty, + unsigned long iova, + unsigned long length) +{ + if (dirty->bitmap) + iova_bitmap_set(dirty->bitmap, iova, length); + + if (dirty->gather) + iommu_iotlb_gather_add_range(dirty->gather, iova, length); +} + /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ extern struct iommu_group *generic_device_group(struct device *dev); /* FSL-MC device grouping function */ struct iommu_group *fsl_mc_device_group(struct device *dev); +extern struct iommu_group *generic_single_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data @@ -703,6 +845,7 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv) dev->iommu->priv = priv; } +extern struct mutex iommu_probe_device_lock; int iommu_probe_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); @@ -727,6 +870,8 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct iommu_domain * iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, unsigned int type); +ioasid_t iommu_alloc_global_pasid(struct device *dev); +void iommu_free_global_pasid(ioasid_t pasid); #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -735,6 +880,8 @@ struct iommu_fwspec {}; struct iommu_device {}; struct iommu_fault_param {}; struct iommu_iotlb_gather {}; +struct iommu_dirty_bitmap {}; +struct iommu_dirty_ops {}; static inline bool iommu_present(const struct bus_type *bus) { @@ -967,6 +1114,18 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) return false; } +static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty, + struct iova_bitmap *bitmap, + struct iommu_iotlb_gather *gather) +{ +} + +static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty, + unsigned long iova, + unsigned long length) +{ +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } @@ -1088,6 +1247,13 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, { return NULL; } + +static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) +{ + return IOMMU_PASID_INVALID; +} + +static inline void iommu_free_global_pasid(ioasid_t pasid) {} #endif /* CONFIG_IOMMU_API */ /** @@ -1100,7 +1266,7 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, * Creates a mapping at @iova for the buffer described by a scatterlist * stored in the given sg_table object in the provided IOMMU domain. */ -static inline size_t iommu_map_sgtable(struct iommu_domain *domain, +static inline ssize_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 1129a36a74c4..ffc3a949f837 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -16,14 +16,19 @@ struct page; struct iommufd_ctx; struct iommufd_access; struct file; +struct iommu_group; struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); +int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id); void iommufd_device_detach(struct iommufd_device *idev); +struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); +u32 iommufd_device_to_id(struct iommufd_device *idev); + struct iommufd_access_ops { u8 needs_pin_pages : 1; void (*unmap)(void *data, unsigned long iova, unsigned long length); @@ -44,12 +49,16 @@ iommufd_access_create(struct iommufd_ctx *ictx, const struct iommufd_access_ops *ops, void *data, u32 *id); void iommufd_access_destroy(struct iommufd_access *access); int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id); +int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id); +void iommufd_access_detach(struct iommufd_access *access); void iommufd_ctx_get(struct iommufd_ctx *ictx); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); +struct iommufd_ctx *iommufd_ctx_from_fd(int fd); void iommufd_ctx_put(struct iommufd_ctx *ictx); +bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group); int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 25d768d48970..14f5cfabbbc8 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -229,7 +229,7 @@ static inline unsigned long resource_ext_type(const struct resource *res) return res->flags & IORESOURCE_EXT_TYPE_BITS; } /* True iff r1 completely contains r2 */ -static inline bool resource_contains(struct resource *r1, struct resource *r2) +static inline bool resource_contains(const struct resource *r1, const struct resource *r2) { if (resource_type(r1) != resource_type(r2)) return false; @@ -239,13 +239,13 @@ static inline bool resource_contains(struct resource *r1, struct resource *r2) } /* True if any part of r1 overlaps r2 */ -static inline bool resource_overlaps(struct resource *r1, struct resource *r2) +static inline bool resource_overlaps(const struct resource *r1, const struct resource *r2) { return r1->start <= r2->end && r1->end >= r2->start; } -static inline bool -resource_intersection(struct resource *r1, struct resource *r2, struct resource *r) +static inline bool resource_intersection(const struct resource *r1, const struct resource *r2, + struct resource *r) { if (!resource_overlaps(r1, r2)) return false; @@ -254,8 +254,8 @@ resource_intersection(struct resource *r1, struct resource *r2, struct resource return true; } -static inline bool -resource_union(struct resource *r1, struct resource *r2, struct resource *r) +static inline bool resource_union(const struct resource *r1, const struct resource *r2, + struct resource *r) { if (!resource_overlaps(r1, r2)) return false; diff --git a/include/linux/ioremap.h b/include/linux/ioremap.h new file mode 100644 index 000000000000..f0e99fc7dd8b --- /dev/null +++ b/include/linux/ioremap.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IOREMAP_H +#define _LINUX_IOREMAP_H + +#include <linux/kasan.h> +#include <asm/pgtable.h> + +#if defined(CONFIG_HAS_IOMEM) || defined(CONFIG_GENERIC_IOREMAP) +/* + * Ioremap often, but not always uses the generic vmalloc area. E.g on + * Power ARCH, it could have different ioremap space. + */ +#ifndef IOREMAP_START +#define IOREMAP_START VMALLOC_START +#define IOREMAP_END VMALLOC_END +#endif +static inline bool is_ioremap_addr(const void *x) +{ + unsigned long addr = (unsigned long)kasan_reset_tag(x); + + return addr >= IOREMAP_START && addr < IOREMAP_END; +} +#else +static inline bool is_ioremap_addr(const void *x) +{ + return false; +} +#endif + +#endif /* _LINUX_IOREMAP_H */ diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index cb71aa616bd3..1b06d074ade0 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -426,7 +426,7 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * iosys_map_rd_field - Read a member from a struct in the iosys_map * * @map__: The iosys_map structure - * @struct_offset__: Offset from the beggining of the map, where the struct + * @struct_offset__: Offset from the beginning of the map, where the struct * is located * @struct_type__: The struct describing the layout of the mapping * @field__: Member of the struct to read @@ -494,7 +494,7 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * iosys_map_wr_field - Write to a member of a struct in the iosys_map * * @map__: The iosys_map structure - * @struct_offset__: Offset from the beggining of the map, where the struct + * @struct_offset__: Offset from the beginning of the map, where the struct * is located * @struct_type__: The struct describing the layout of the mapping * @field__: Member of the struct to read diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h new file mode 100644 index 000000000000..270454a6703d --- /dev/null +++ b/include/linux/iov_iter.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* I/O iterator iteration building functions. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _LINUX_IOV_ITER_H +#define _LINUX_IOV_ITER_H + +#include <linux/uio.h> +#include <linux/bvec.h> + +typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2); +typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len, + void *priv, void *priv2); + +/* + * Handle ITER_UBUF. + */ +static __always_inline +size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f step) +{ + void __user *base = iter->ubuf; + size_t progress = 0, remain; + + remain = step(base + iter->iov_offset, 0, len, priv, priv2); + progress = len - remain; + iter->iov_offset += progress; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_IOVEC. + */ +static __always_inline +size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f step) +{ + const struct iovec *p = iter->__iov; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t part = min(len, p->iov_len - skip); + + if (likely(part)) { + remain = step(p->iov_base + skip, progress, part, priv, priv2); + consumed = part - remain; + progress += consumed; + skip += consumed; + len -= consumed; + if (skip < p->iov_len) + break; + } + p++; + skip = 0; + } while (len); + + iter->nr_segs -= p - iter->__iov; + iter->__iov = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_KVEC. + */ +static __always_inline +size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct kvec *p = iter->kvec; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t part = min(len, p->iov_len - skip); + + if (likely(part)) { + remain = step(p->iov_base + skip, progress, part, priv, priv2); + consumed = part - remain; + progress += consumed; + skip += consumed; + len -= consumed; + if (skip < p->iov_len) + break; + } + p++; + skip = 0; + } while (len); + + iter->nr_segs -= p - iter->kvec; + iter->kvec = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_BVEC. + */ +static __always_inline +size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct bio_vec *p = iter->bvec; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t offset = p->bv_offset + skip, part; + void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE); + + part = min3(len, + (size_t)(p->bv_len - skip), + (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); + remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2); + kunmap_local(kaddr); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + if (skip >= p->bv_len) { + skip = 0; + p++; + } + if (remain) + break; + } while (len); + + iter->nr_segs -= p - iter->bvec; + iter->bvec = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_XARRAY. + */ +static __always_inline +size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + struct folio *folio; + size_t progress = 0; + loff_t start = iter->xarray_start + iter->iov_offset; + pgoff_t index = start / PAGE_SIZE; + XA_STATE(xas, iter->xarray, index); + + rcu_read_lock(); + xas_for_each(&xas, folio, ULONG_MAX) { + size_t remain, consumed, offset, part, flen; + + if (xas_retry(&xas, folio)) + continue; + if (WARN_ON(xa_is_value(folio))) + break; + if (WARN_ON(folio_test_hugetlb(folio))) + break; + + offset = offset_in_folio(folio, start + progress); + flen = min(folio_size(folio) - offset, len); + + while (flen) { + void *base = kmap_local_folio(folio, offset); + + part = min_t(size_t, flen, + PAGE_SIZE - offset_in_page(offset)); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + + consumed = part - remain; + progress += consumed; + len -= consumed; + + if (remain || len == 0) + goto out; + flen -= consumed; + offset += consumed; + } + } + +out: + rcu_read_unlock(); + iter->iov_offset += progress; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_DISCARD. + */ +static __always_inline +size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + size_t progress = len; + + iter->count -= progress; + return progress; +} + +/** + * iterate_and_advance2 - Iterate over an iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @priv2: More data for the step functions. + * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. + * @step: Function for other iterators; given kernel addresses. + * + * Iterate over the next part of an iterator, up to the specified length. The + * buffer is presented in segments, which for kernel iteration are broken up by + * physical pages and mapped, with the mapped address being presented. + * + * Two step functions, @step and @ustep, must be provided, one for handling + * mapped kernel addresses and the other is given user addresses which have the + * potential to fault since no pinning is performed. + * + * The step functions are passed the address and length of the segment, @priv, + * @priv2 and the amount of data so far iterated over (which can, for example, + * be added to @priv to point to the right part of a second buffer). The step + * functions should return the amount of the segment they didn't process (ie. 0 + * indicates complete processsing). + * + * This function returns the amount of data processed (ie. 0 means nothing was + * processed and the value of @len means processes to completion). + */ +static __always_inline +size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv, + void *priv2, iov_ustep_f ustep, iov_step_f step) +{ + if (unlikely(iter->count < len)) + len = iter->count; + if (unlikely(!len)) + return 0; + + if (likely(iter_is_ubuf(iter))) + return iterate_ubuf(iter, len, priv, priv2, ustep); + if (likely(iter_is_iovec(iter))) + return iterate_iovec(iter, len, priv, priv2, ustep); + if (iov_iter_is_bvec(iter)) + return iterate_bvec(iter, len, priv, priv2, step); + if (iov_iter_is_kvec(iter)) + return iterate_kvec(iter, len, priv, priv2, step); + if (iov_iter_is_xarray(iter)) + return iterate_xarray(iter, len, priv, priv2, step); + return iterate_discard(iter, len, priv, priv2, step); +} + +/** + * iterate_and_advance - Iterate over an iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. + * @step: Function for other iterators; given kernel addresses. + * + * As iterate_and_advance2(), but priv2 is always NULL. + */ +static __always_inline +size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv, + iov_ustep_f ustep, iov_step_f step) +{ + return iterate_and_advance2(iter, len, priv, NULL, ustep, step); +} + +#endif /* _LINUX_IOV_ITER_H */ diff --git a/include/linux/iova_bitmap.h b/include/linux/iova_bitmap.h index c006cf0a25f3..1c338f5e5b7a 100644 --- a/include/linux/iova_bitmap.h +++ b/include/linux/iova_bitmap.h @@ -7,6 +7,7 @@ #define _IOVA_BITMAP_H_ #include <linux/types.h> +#include <linux/errno.h> struct iova_bitmap; @@ -14,6 +15,7 @@ typedef int (*iova_bitmap_fn_t)(struct iova_bitmap *bitmap, unsigned long iova, size_t length, void *opaque); +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER) struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, unsigned long page_size, u64 __user *data); @@ -22,5 +24,29 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, iova_bitmap_fn_t fn); void iova_bitmap_set(struct iova_bitmap *bitmap, unsigned long iova, size_t length); +#else +static inline struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, + size_t length, + unsigned long page_size, + u64 __user *data) +{ + return NULL; +} + +static inline void iova_bitmap_free(struct iova_bitmap *bitmap) +{ +} + +static inline int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, + iova_bitmap_fn_t fn) +{ + return -EOPNOTSUPP; +} + +static inline void iova_bitmap_set(struct iova_bitmap *bitmap, + unsigned long iova, size_t length) +{ +} +#endif #endif diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 839247a4f48e..5e605e384aac 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -33,6 +33,7 @@ struct ipv6_devconf { __s32 accept_ra_defrtr; __u32 ra_defrtr_metric; __s32 accept_ra_min_hop_limit; + __s32 accept_ra_min_lft; __s32 accept_ra_pinfo; __s32 ignore_routes_with_linkdown; #ifdef CONFIG_IPV6_ROUTER_PREF @@ -81,6 +82,7 @@ struct ipv6_devconf { __u32 ioam6_id_wide; __u8 ioam6_enabled; __u8 ndisc_evict_nocarrier; + __u8 ra_honor_pio_life; struct ctl_table_header *sysctl_header; }; @@ -146,6 +148,7 @@ struct inet6_skb_parm { #define IP6SKB_JUMBOGRAM 128 #define IP6SKB_SEG6 256 #define IP6SKB_FAKEJUMBO 512 +#define IP6SKB_MULTIPATH 1024 }; #if defined(CONFIG_NET_L3_MASTER_DEV) @@ -199,14 +202,7 @@ struct inet6_cork { u8 tclass; }; -/** - * struct ipv6_pinfo - ipv6 private area - * - * In the struct sock hierarchy (tcp6_sock, upd6_sock, etc) - * this _must_ be the last member, so that inet6_sk_generic - * is able to calculate its offset from the base struct sock - * by using the struct proto->slab_obj_size member. -acme - */ +/* struct ipv6_pinfo - ipv6 private area */ struct ipv6_pinfo { struct in6_addr saddr; struct in6_pktinfo sticky_pktinfo; @@ -218,28 +214,9 @@ struct ipv6_pinfo { __be32 flow_label; __u32 frag_size; - /* - * Packed in 16bits. - * Omit one shift by putting the signed field at MSB. - */ -#if defined(__BIG_ENDIAN_BITFIELD) - __s16 hop_limit:9; - __u16 __unused_1:7; -#else - __u16 __unused_1:7; - __s16 hop_limit:9; -#endif + s16 hop_limit; + u8 mcast_hops; -#if defined(__BIG_ENDIAN_BITFIELD) - /* Packed in 16bits. */ - __s16 mcast_hops:9; - __u16 __unused_2:6, - mc_loop:1; -#else - __u16 mc_loop:1, - __unused_2:6; - __s16 mcast_hops:9; -#endif int ucast_oif; int mcast_oif; @@ -267,21 +244,11 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 recverr:1, - sndflow:1, - repflow:1, - pmtudisc:3, - padding:1, /* 1 bit hole */ - srcprefs:3, /* 001: prefer temporary address + __u8 srcprefs; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ - dontfrag:1, - autoflowlabel:1, - autoflowlabel_set:1, - mc_all:1, - recverr_rfc4884:1, - rtalert_isolate:1; + __u8 pmtudisc; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; @@ -298,6 +265,18 @@ struct ipv6_pinfo { struct inet6_cork cork; }; +/* We currently use available bits from inet_sk(sk)->inet_flags, + * this could change in the future. + */ +#define inet6_test_bit(nr, sk) \ + test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_set_bit(nr, sk) \ + set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_clear_bit(nr, sk) \ + clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_assign_bit(nr, sk, val) \ + assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) + /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { /* inet_sock has to be the first member of raw6_sock */ @@ -306,19 +285,19 @@ struct raw6_sock { __u32 offset; /* checksum offset */ struct icmp6_filter filter; __u32 ip6mr_table; - /* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */ + struct ipv6_pinfo inet6; }; struct udp6_sock { struct udp_sock udp; - /* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */ + struct ipv6_pinfo inet6; }; struct tcp6_sock { struct tcp_sock tcp; - /* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */ + struct ipv6_pinfo inet6; }; diff --git a/include/linux/irq.h b/include/linux/irq.h index d8a6fdce9373..90081afa10ce 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -215,8 +215,6 @@ struct irq_data { * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set * IRQD_CAN_RESERVE - Can use reservation mode - * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change - * required * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked * from actual interrupt context. * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call @@ -247,11 +245,10 @@ enum { IRQD_SINGLE_TARGET = BIT(24), IRQD_DEFAULT_TRIGGER_SET = BIT(25), IRQD_CAN_RESERVE = BIT(26), - IRQD_MSI_NOMASK_QUIRK = BIT(27), - IRQD_HANDLE_ENFORCE_IRQCTX = BIT(28), - IRQD_AFFINITY_ON_ACTIVATE = BIT(29), - IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(30), - IRQD_RESEND_WHEN_IN_PROGRESS = BIT(31), + IRQD_HANDLE_ENFORCE_IRQCTX = BIT(27), + IRQD_AFFINITY_ON_ACTIVATE = BIT(28), + IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(29), + IRQD_RESEND_WHEN_IN_PROGRESS = BIT(30), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -426,21 +423,6 @@ static inline bool irqd_can_reserve(struct irq_data *d) return __irqd_to_state(d) & IRQD_CAN_RESERVE; } -static inline void irqd_set_msi_nomask_quirk(struct irq_data *d) -{ - __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK; -} - -static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d) -{ - __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK; -} - -static inline bool irqd_msi_nomask_quirk(struct irq_data *d) -{ - return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; -} - static inline void irqd_set_affinity_on_activate(struct irq_data *d) { __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE; diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 51c254b7fec2..ee0a82c60508 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -174,7 +174,7 @@ struct irq_domain { irq_hw_number_t hwirq_max; unsigned int revmap_size; struct radix_tree_root revmap_tree; - struct irq_data __rcu *revmap[]; + struct irq_data __rcu *revmap[] __counted_by(revmap_size); }; /* Irq domain flags */ diff --git a/include/linux/iversion.h b/include/linux/iversion.h index f174ff1b59ee..8f972eaca2ed 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -256,7 +256,7 @@ inode_peek_iversion(const struct inode *inode) * For filesystems without any sort of change attribute, the best we can * do is fake one up from the ctime: */ -static inline u64 time_to_chattr(struct timespec64 *t) +static inline u64 time_to_chattr(const struct timespec64 *t) { u64 chattr = t->tv_sec; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 44c298aa58d4..beb30719ee16 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -631,11 +631,6 @@ struct transaction_s struct list_head t_inode_list; /* - * Protects info related to handles - */ - spinlock_t t_handle_lock; - - /* * Longest time some handle had to wait for running transaction */ unsigned long t_max_wait; @@ -891,7 +886,7 @@ struct journal_s * Journal head shrinker, reclaim buffer's journal head which * has been written back. */ - struct shrinker j_shrinker; + struct shrinker *j_shrinker; /** * @j_checkpoint_jh_count: @@ -1379,6 +1374,9 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) +/* Journal high priority write IO operation flags */ +#define JBD2_JOURNAL_REQ_FLAGS (REQ_META | REQ_SYNC | REQ_IDLE) + /* * Journal flag definitions */ diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 5e13f801c902..e0ae2a43e0eb 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -72,9 +72,15 @@ extern int register_refined_jiffies(long clock_tick_rate); #endif /* - * The 64-bit value is not atomic - you MUST NOT read it + * The 64-bit value is not atomic on 32-bit systems - you MUST NOT read it * without sampling the sequence number in jiffies_lock. * get_jiffies_64() will do this for you as appropriate. + * + * jiffies and jiffies_64 are at the same address for little-endian systems + * and for 64-bit big-endian systems. + * On 32-bit big-endian systems, jiffies is the lower 32 bits of jiffies_64 + * (i.e., at address @jiffies_64 + 4). + * See arch/ARCH/kernel/vmlinux.lds.S */ extern u64 __cacheline_aligned_in_smp jiffies_64; extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; @@ -82,6 +88,14 @@ extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffi #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void); #else +/** + * get_jiffies_64 - read the 64-bit non-atomic jiffies_64 value + * + * When BITS_PER_LONG < 64, this uses sequence number sampling using + * jiffies_lock to protect the 64-bit read. + * + * Return: current 64-bit jiffies value + */ static inline u64 get_jiffies_64(void) { return (u64)jiffies; @@ -89,39 +103,76 @@ static inline u64 get_jiffies_64(void) #endif /* - * These inlines deal with timer wrapping correctly. You are - * strongly encouraged to use them + * These inlines deal with timer wrapping correctly. You are + * strongly encouraged to use them: * 1. Because people otherwise forget * 2. Because if the timer wrap changes in future you won't have to * alter your driver code. - * - * time_after(a,b) returns true if the time a is after time b. + */ + +/** + * time_after - returns true if the time a is after time b. + * @a: first comparable as unsigned long + * @b: second comparable as unsigned long * * Do this with "<0" and ">=0" to only test the sign of the result. A * good compiler would generate better code (and a really good compiler * wouldn't care). Gcc is currently neither. + * + * Return: %true is time a is after time b, otherwise %false. */ #define time_after(a,b) \ (typecheck(unsigned long, a) && \ typecheck(unsigned long, b) && \ ((long)((b) - (a)) < 0)) +/** + * time_before - returns true if the time a is before time b. + * @a: first comparable as unsigned long + * @b: second comparable as unsigned long + * + * Return: %true is time a is before time b, otherwise %false. + */ #define time_before(a,b) time_after(b,a) +/** + * time_after_eq - returns true if the time a is after or the same as time b. + * @a: first comparable as unsigned long + * @b: second comparable as unsigned long + * + * Return: %true is time a is after or the same as time b, otherwise %false. + */ #define time_after_eq(a,b) \ (typecheck(unsigned long, a) && \ typecheck(unsigned long, b) && \ ((long)((a) - (b)) >= 0)) +/** + * time_before_eq - returns true if the time a is before or the same as time b. + * @a: first comparable as unsigned long + * @b: second comparable as unsigned long + * + * Return: %true is time a is before or the same as time b, otherwise %false. + */ #define time_before_eq(a,b) time_after_eq(b,a) -/* - * Calculate whether a is in the range of [b, c]. +/** + * time_in_range - Calculate whether a is in the range of [b, c]. + * @a: time to test + * @b: beginning of the range + * @c: end of the range + * + * Return: %true is time a is in the range [b, c], otherwise %false. */ #define time_in_range(a,b,c) \ (time_after_eq(a,b) && \ time_before_eq(a,c)) -/* - * Calculate whether a is in the range of [b, c). +/** + * time_in_range_open - Calculate whether a is in the range of [b, c). + * @a: time to test + * @b: beginning of the range + * @c: end of the range + * + * Return: %true is time a is in the range [b, c), otherwise %false. */ #define time_in_range_open(a,b,c) \ (time_after_eq(a,b) && \ @@ -129,45 +180,138 @@ static inline u64 get_jiffies_64(void) /* Same as above, but does so with platform independent 64bit types. * These must be used when utilizing jiffies_64 (i.e. return value of - * get_jiffies_64() */ + * get_jiffies_64()). */ + +/** + * time_after64 - returns true if the time a is after time b. + * @a: first comparable as __u64 + * @b: second comparable as __u64 + * + * This must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64()). + * + * Return: %true is time a is after time b, otherwise %false. + */ #define time_after64(a,b) \ (typecheck(__u64, a) && \ typecheck(__u64, b) && \ ((__s64)((b) - (a)) < 0)) +/** + * time_before64 - returns true if the time a is before time b. + * @a: first comparable as __u64 + * @b: second comparable as __u64 + * + * This must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64()). + * + * Return: %true is time a is before time b, otherwise %false. + */ #define time_before64(a,b) time_after64(b,a) +/** + * time_after_eq64 - returns true if the time a is after or the same as time b. + * @a: first comparable as __u64 + * @b: second comparable as __u64 + * + * This must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64()). + * + * Return: %true is time a is after or the same as time b, otherwise %false. + */ #define time_after_eq64(a,b) \ (typecheck(__u64, a) && \ typecheck(__u64, b) && \ ((__s64)((a) - (b)) >= 0)) +/** + * time_before_eq64 - returns true if the time a is before or the same as time b. + * @a: first comparable as __u64 + * @b: second comparable as __u64 + * + * This must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64()). + * + * Return: %true is time a is before or the same as time b, otherwise %false. + */ #define time_before_eq64(a,b) time_after_eq64(b,a) +/** + * time_in_range64 - Calculate whether a is in the range of [b, c]. + * @a: time to test + * @b: beginning of the range + * @c: end of the range + * + * Return: %true is time a is in the range [b, c], otherwise %false. + */ #define time_in_range64(a, b, c) \ (time_after_eq64(a, b) && \ time_before_eq64(a, c)) /* - * These four macros compare jiffies and 'a' for convenience. + * These eight macros compare jiffies[_64] and 'a' for convenience. */ -/* time_is_before_jiffies(a) return true if a is before jiffies */ +/** + * time_is_before_jiffies - return true if a is before jiffies + * @a: time (unsigned long) to compare to jiffies + * + * Return: %true is time a is before jiffies, otherwise %false. + */ #define time_is_before_jiffies(a) time_after(jiffies, a) +/** + * time_is_before_jiffies64 - return true if a is before jiffies_64 + * @a: time (__u64) to compare to jiffies_64 + * + * Return: %true is time a is before jiffies_64, otherwise %false. + */ #define time_is_before_jiffies64(a) time_after64(get_jiffies_64(), a) -/* time_is_after_jiffies(a) return true if a is after jiffies */ +/** + * time_is_after_jiffies - return true if a is after jiffies + * @a: time (unsigned long) to compare to jiffies + * + * Return: %true is time a is after jiffies, otherwise %false. + */ #define time_is_after_jiffies(a) time_before(jiffies, a) +/** + * time_is_after_jiffies64 - return true if a is after jiffies_64 + * @a: time (__u64) to compare to jiffies_64 + * + * Return: %true is time a is after jiffies_64, otherwise %false. + */ #define time_is_after_jiffies64(a) time_before64(get_jiffies_64(), a) -/* time_is_before_eq_jiffies(a) return true if a is before or equal to jiffies*/ +/** + * time_is_before_eq_jiffies - return true if a is before or equal to jiffies + * @a: time (unsigned long) to compare to jiffies + * + * Return: %true is time a is before or the same as jiffies, otherwise %false. + */ #define time_is_before_eq_jiffies(a) time_after_eq(jiffies, a) +/** + * time_is_before_eq_jiffies64 - return true if a is before or equal to jiffies_64 + * @a: time (__u64) to compare to jiffies_64 + * + * Return: %true is time a is before or the same jiffies_64, otherwise %false. + */ #define time_is_before_eq_jiffies64(a) time_after_eq64(get_jiffies_64(), a) -/* time_is_after_eq_jiffies(a) return true if a is after or equal to jiffies*/ +/** + * time_is_after_eq_jiffies - return true if a is after or equal to jiffies + * @a: time (unsigned long) to compare to jiffies + * + * Return: %true is time a is after or the same as jiffies, otherwise %false. + */ #define time_is_after_eq_jiffies(a) time_before_eq(jiffies, a) +/** + * time_is_after_eq_jiffies64 - return true if a is after or equal to jiffies_64 + * @a: time (__u64) to compare to jiffies_64 + * + * Return: %true is time a is after or the same as jiffies_64, otherwise %false. + */ #define time_is_after_eq_jiffies64(a) time_before_eq64(get_jiffies_64(), a) /* - * Have the 32 bit jiffies value wrap 5 minutes after boot + * Have the 32-bit jiffies value wrap 5 minutes after boot * so jiffies wrap bugs show up earlier. */ #define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) @@ -278,7 +422,7 @@ extern unsigned long preset_lpj; #if BITS_PER_LONG < 64 # define MAX_SEC_IN_JIFFIES \ (long)((u64)((u64)MAX_JIFFY_OFFSET * TICK_NSEC) / NSEC_PER_SEC) -#else /* take care of overflow on 64 bits machines */ +#else /* take care of overflow on 64-bit machines */ # define MAX_SEC_IN_JIFFIES \ (SH_DIV((MAX_JIFFY_OFFSET >> SEC_JIFFIE_SC) * TICK_NSEC, NSEC_PER_SEC, 1) - 1) @@ -290,6 +434,12 @@ extern unsigned long preset_lpj; extern unsigned int jiffies_to_msecs(const unsigned long j); extern unsigned int jiffies_to_usecs(const unsigned long j); +/** + * jiffies_to_nsecs - Convert jiffies to nanoseconds + * @j: jiffies value + * + * Return: nanoseconds value + */ static inline u64 jiffies_to_nsecs(const unsigned long j) { return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC; @@ -353,12 +503,14 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) * * msecs_to_jiffies() checks for the passed in value being a constant * via __builtin_constant_p() allowing gcc to eliminate most of the - * code, __msecs_to_jiffies() is called if the value passed does not + * code. __msecs_to_jiffies() is called if the value passed does not * allow constant folding and the actual conversion must be done at * runtime. - * the HZ range specific helpers _msecs_to_jiffies() are called both + * The HZ range specific helpers _msecs_to_jiffies() are called both * directly here and from __msecs_to_jiffies() in the case where * constant folding is not possible. + * + * Return: jiffies value */ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) { @@ -400,12 +552,14 @@ static inline unsigned long _usecs_to_jiffies(const unsigned int u) * * usecs_to_jiffies() checks for the passed in value being a constant * via __builtin_constant_p() allowing gcc to eliminate most of the - * code, __usecs_to_jiffies() is called if the value passed does not + * code. __usecs_to_jiffies() is called if the value passed does not * allow constant folding and the actual conversion must be done at * runtime. - * the HZ range specific helpers _usecs_to_jiffies() are called both + * The HZ range specific helpers _usecs_to_jiffies() are called both * directly here and from __msecs_to_jiffies() in the case where * constant folding is not possible. + * + * Return: jiffies value */ static __always_inline unsigned long usecs_to_jiffies(const unsigned int u) { @@ -422,6 +576,7 @@ extern unsigned long timespec64_to_jiffies(const struct timespec64 *value); extern void jiffies_to_timespec64(const unsigned long jiffies, struct timespec64 *value); extern clock_t jiffies_to_clock_t(unsigned long x); + static inline clock_t jiffies_delta_to_clock_t(long delta) { return jiffies_to_clock_t(max(0L, delta)); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 819b6bc8ac08..72cb693b075b 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -54,11 +54,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); +#ifndef kasan_mem_to_shadow static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } +#endif int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); @@ -283,8 +285,10 @@ static inline bool kasan_check_byte(const void *address) #if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) void kasan_unpoison_task_stack(struct task_struct *task); +asmlinkage void kasan_unpoison_task_stack_below(const void *watermark); #else static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +static inline void kasan_unpoison_task_stack_below(const void *watermark) {} #endif #ifdef CONFIG_KASAN_GENERIC @@ -464,10 +468,10 @@ static inline void kasan_free_module_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -#ifdef CONFIG_KASAN_INLINE +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) void kasan_non_canonical_hook(unsigned long addr); -#else /* CONFIG_KASAN_INLINE */ +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ static inline void kasan_non_canonical_hook(unsigned long addr) { } -#endif /* CONFIG_KASAN_INLINE */ +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ #endif /* LINUX_KASAN_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 0d91e0af0125..d9ad21058eed 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -13,6 +13,7 @@ #include <linux/stdarg.h> #include <linux/align.h> +#include <linux/array_size.h> #include <linux/limits.h> #include <linux/linkage.h> #include <linux/stddef.h> @@ -29,6 +30,7 @@ #include <linux/panic.h> #include <linux/printk.h> #include <linux/build_bug.h> +#include <linux/sprintf.h> #include <linux/static_call_types.h> #include <linux/instruction_pointer.h> #include <asm/byteorder.h> @@ -49,12 +51,6 @@ #define READ 0 #define WRITE 1 -/** - * ARRAY_SIZE - get the number of elements in array @arr - * @arr: array to be sized - */ -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) - #define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) #define u64_to_user_ptr(x) ( \ @@ -203,35 +199,6 @@ static inline void might_fault(void) { } void do_exit(long error_code) __noreturn; -extern int num_to_str(char *buf, int size, - unsigned long long num, unsigned int width); - -/* lib/printf utilities */ - -extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...); -extern __printf(2, 0) int vsprintf(char *buf, const char *, va_list); -extern __printf(3, 4) -int snprintf(char *buf, size_t size, const char *fmt, ...); -extern __printf(3, 0) -int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); -extern __printf(3, 4) -int scnprintf(char *buf, size_t size, const char *fmt, ...); -extern __printf(3, 0) -int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); -extern __printf(2, 3) __malloc -char *kasprintf(gfp_t gfp, const char *fmt, ...); -extern __printf(2, 0) __malloc -char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); -extern __printf(2, 0) -const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args); - -extern __scanf(2, 3) -int sscanf(const char *, const char *, ...); -extern __scanf(2, 0) -int vsscanf(const char *, const char *, va_list); - -extern int no_hash_pointers_enable(char *str); - extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); @@ -457,13 +424,6 @@ ftrace_vprintk(const char *fmt, va_list ap) static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ -/* This counts to 12. Any more, it will return 13th argument. */ -#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n -#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) - -#define __CONCAT(a, b) a ## b -#define CONCATENATE(a, b) __CONCAT(a, b) - /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 73f5c120def8..99aaa050ccb7 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -316,6 +316,7 @@ struct kernfs_ops { struct poll_table_struct *pt); int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma); + loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence); }; /* @@ -550,6 +551,10 @@ static inline int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) { return -ENOSYS; } +static inline __poll_t kernfs_generic_poll(struct kernfs_open_file *of, + struct poll_table_struct *pt) +{ return -ENOSYS; } + static inline void kernfs_notify(struct kernfs_node *kn) { } static inline int kernfs_xattr_get(struct kernfs_node *kn, const char *name, diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 22b5cd24f581..8227455192b7 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -22,10 +22,6 @@ #include <uapi/linux/kexec.h> #include <linux/verification.h> -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; extern note_buf_t __percpu *crash_notes; #ifdef CONFIG_KEXEC_CORE @@ -33,6 +29,7 @@ extern note_buf_t __percpu *crash_notes; #include <linux/compat.h> #include <linux/ioport.h> #include <linux/module.h> +#include <linux/highmem.h> #include <asm/kexec.h> /* Verify architecture specific macros are defined */ @@ -230,21 +227,6 @@ static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) } #endif -/* Alignment required for elf header segment */ -#define ELF_CORE_HEADER_ALIGN 4096 - -struct crash_mem { - unsigned int max_nr_ranges; - unsigned int nr_ranges; - struct range ranges[]; -}; - -extern int crash_exclude_mem_range(struct crash_mem *mem, - unsigned long long mstart, - unsigned long long mend); -extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, - void **addr, unsigned long *sz); - #ifndef arch_kexec_apply_relocations_add /* * arch_kexec_apply_relocations_add - apply relocations of type RELA @@ -334,6 +316,10 @@ struct kimage { unsigned int preserve_context : 1; /* If set, we are using file mode kexec syscall */ unsigned int file_mode:1; +#ifdef CONFIG_CRASH_HOTPLUG + /* If set, allow changes to elfcorehdr of kexec_load'd image */ + unsigned int update_elfcorehdr:1; +#endif #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; @@ -360,6 +346,12 @@ struct kimage { struct purgatory_info purgatory_info; #endif +#ifdef CONFIG_CRASH_HOTPLUG + int hp_action; + int elfcorehdr_index; + bool elfcorehdr_updated; +#endif + #ifdef CONFIG_IMA_KEXEC /* Virtual address of IMA measurement buffer for kexec syscall */ void *ima_buffer; @@ -404,9 +396,9 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec flags */ #ifndef CONFIG_KEXEC_JUMP -#define KEXEC_FLAGS KEXEC_ON_CRASH +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_UPDATE_ELFCOREHDR) #else -#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT) +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT | KEXEC_UPDATE_ELFCOREHDR) #endif /* List of defined/legal kexec file flags */ @@ -490,6 +482,24 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { } #endif +#ifndef arch_crash_handle_hotplug_event +static inline void arch_crash_handle_hotplug_event(struct kimage *image) { } +#endif + +int crash_check_update_elfcorehdr(void); + +#ifndef crash_hotplug_cpu_support +static inline int crash_hotplug_cpu_support(void) { return 0; } +#endif + +#ifndef crash_hotplug_memory_support +static inline int crash_hotplug_memory_support(void) { return 0; } +#endif + +#ifndef crash_get_elfcorehdr_size +static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; } +#endif + #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 7d985a1dfe4a..5caf3ce82373 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -73,6 +73,7 @@ struct key_type { unsigned int flags; #define KEY_TYPE_NET_DOMAIN 0x00000001 /* Keys of this type have a net namespace domain */ +#define KEY_TYPE_INSTANT_REAP 0x00000002 /* Keys of this type don't have a delay after expiring */ /* vet a description */ int (*vet_description)(const char *description); diff --git a/include/linux/key.h b/include/linux/key.h index 938d7ecfb495..943a432da3ae 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -515,6 +515,7 @@ extern void key_init(void); #define key_init() do { } while(0) #define key_free_user_ns(ns) do { } while(0) #define key_remove_domain(d) do { } while(0) +#define key_lookup(k) NULL #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 726857a4b680..401af4757514 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -59,15 +59,16 @@ static __always_inline bool is_kfence_address(const void *addr) } /** - * kfence_alloc_pool() - allocate the KFENCE pool via memblock + * kfence_alloc_pool_and_metadata() - allocate the KFENCE pool and KFENCE + * metadata via memblock */ -void __init kfence_alloc_pool(void); +void __init kfence_alloc_pool_and_metadata(void); /** * kfence_init() - perform KFENCE initialization at boot time * - * Requires that kfence_alloc_pool() was called before. This sets up the - * allocation gate timer, and requires that workqueues are available. + * Requires that kfence_alloc_pool_and_metadata() was called before. This sets + * up the allocation gate timer, and requires that workqueues are available. */ void __init kfence_init(void); @@ -223,7 +224,7 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla #else /* CONFIG_KFENCE */ static inline bool is_kfence_address(const void *addr) { return false; } -static inline void kfence_alloc_pool(void) { } +static inline void kfence_alloc_pool_and_metadata(void) { } static inline void kfence_init(void) { } static inline void kfence_shutdown_cache(struct kmem_cache *s) { } static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; } diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c392c811d9ad..c30affcc43b4 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -69,14 +69,16 @@ struct kobject { const struct kobj_type *ktype; struct kernfs_node *sd; /* sysfs directory entry */ struct kref kref; -#ifdef CONFIG_DEBUG_KOBJECT_RELEASE - struct delayed_work release; -#endif + unsigned int state_initialized:1; unsigned int state_in_sysfs:1; unsigned int state_add_uevent_sent:1; unsigned int state_remove_uevent_sent:1; unsigned int uevent_suppress:1; + +#ifdef CONFIG_DEBUG_KOBJECT_RELEASE + struct delayed_work release; +#endif }; __printf(2, 3) int kobject_set_name(struct kobject *kobj, const char *name, ...); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 85a64cb95d75..0ff44d6633e3 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -26,8 +26,7 @@ #include <linux/rcupdate.h> #include <linux/mutex.h> #include <linux/ftrace.h> -#include <linux/refcount.h> -#include <linux/freelist.h> +#include <linux/objpool.h> #include <linux/rethook.h> #include <asm/kprobes.h> @@ -140,8 +139,8 @@ static inline bool kprobe_ftrace(struct kprobe *p) * */ struct kretprobe_holder { - struct kretprobe *rp; - refcount_t ref; + struct kretprobe __rcu *rp; + struct objpool_head pool; }; struct kretprobe { @@ -154,7 +153,6 @@ struct kretprobe { #ifdef CONFIG_KRETPROBE_ON_RETHOOK struct rethook *rh; #else - struct freelist_head freelist; struct kretprobe_holder *rph; #endif }; @@ -165,10 +163,7 @@ struct kretprobe_instance { #ifdef CONFIG_KRETPROBE_ON_RETHOOK struct rethook_node node; #else - union { - struct freelist_node freelist; - struct rcu_head rcu; - }; + struct rcu_head rcu; struct llist_node llist; struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; @@ -202,10 +197,8 @@ extern int arch_trampoline_kprobe(struct kprobe *p); #ifdef CONFIG_KRETPROBE_ON_RETHOOK static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) { - RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), - "Kretprobe is accessed from instance under preemptive context"); - - return (struct kretprobe *)READ_ONCE(ri->node.rethook->data); + /* rethook::data is non-changed field, so that you can access it freely. */ + return (struct kretprobe *)ri->node.rethook->data; } static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) { @@ -250,10 +243,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) { - RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), - "Kretprobe is accessed from instance under preemptive context"); - - return READ_ONCE(ri->rph->rp); + return rcu_dereference_check(ri->rph->rp, rcu_read_lock_any_held()); } static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) @@ -450,6 +440,10 @@ int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, char *type, char *sym); + +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + #else /* !CONFIG_KPROBES: */ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 899a314bc487..c2dd786a30e1 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -26,6 +26,22 @@ int ksm_disable(struct mm_struct *mm); int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); +/* + * To identify zeropages that were mapped by KSM, we reuse the dirty bit + * in the PTE. If the PTE is dirty, the zeropage was mapped by KSM when + * deduplicating memory. + */ +#define is_ksm_zero_pte(pte) (is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte)) + +extern unsigned long ksm_zero_pages; + +static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) +{ + if (is_ksm_zero_pte(pte)) { + ksm_zero_pages--; + mm->ksm_zero_pages--; + } +} static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { @@ -95,6 +111,10 @@ static inline void ksm_exit(struct mm_struct *mm) { } +static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) +{ +} + #ifdef CONFIG_MEMORY_FAILURE static inline void collect_procs_ksm(struct page *page, struct list_head *to_kill, int force_early) diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h index 529974e22ea7..7fcf29a4e0de 100644 --- a/include/linux/kstrtox.h +++ b/include/linux/kstrtox.h @@ -147,9 +147,4 @@ extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); -static inline int strtobool(const char *s, bool *res) -{ - return kstrtobool(s, res); -} - #endif /* _LINUX_KSTRTOX_H */ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index f1f95a71a4bc..b11f53c1ba2e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -86,9 +86,9 @@ void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); int kthread_stop(struct task_struct *k); +int kthread_stop_put(struct task_struct *k); bool kthread_should_stop(void); bool kthread_should_park(void); -bool __kthread_should_park(struct task_struct *k); bool kthread_should_stop_or_park(void); bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_func(struct task_struct *k); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d3ac7720da9..4944136efaa2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -190,8 +190,6 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, struct kvm_vcpu *except); -bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req, - unsigned long *vcpu_bitmap); #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -256,11 +254,15 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif #ifdef KVM_ARCH_WANT_MMU_NOTIFIER +union kvm_mmu_notifier_arg { + pte_t pte; +}; + struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; gfn_t end; - pte_t pte; + union kvm_mmu_notifier_arg arg; bool may_block; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); @@ -662,7 +664,7 @@ struct kvm_irq_routing_table { * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ - struct hlist_head map[]; + struct hlist_head map[] __counted_by(nr_rt_entries); }; #endif @@ -865,6 +867,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm) unlikely(__ret); \ }) +/* + * Note, "data corruption" refers to corruption of host kernel data structures, + * not guest data. Guest data corruption, suspected or confirmed, that is tied + * and contained to a single VM should *never* BUG() and potentially panic the + * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure + * is corrupted and that corruption can have a cascading effect to other parts + * of the hosts and/or to other VMs. + */ +#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \ +({ \ + bool __ret = !!(cond); \ + \ + if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \ + BUG_ON(__ret); \ + else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \ + kvm_vm_bugged(kvm); \ + unlikely(__ret); \ +}) + static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PROVE_RCU @@ -1359,6 +1380,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); +void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); +void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); @@ -1387,10 +1411,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, unsigned long mask); void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot); -#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ +#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty, struct kvm_memory_slot **memslot); @@ -1479,11 +1500,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif -#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { return -ENOTSUPP; } +#else +int kvm_arch_flush_remote_tlbs(struct kvm *kvm); +#endif + +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE +static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, + gfn_t gfn, u64 nr_pages) +{ + return -EOPNOTSUPP; +} +#else +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); #endif #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA @@ -2148,8 +2181,6 @@ struct kvm_device_ops { int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma); }; -void kvm_device_get(struct kvm_device *dev); -void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type); diff --git a/include/linux/leds.h b/include/linux/leds.h index 7d428100b42b..aa16dc2a8230 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -100,6 +100,7 @@ struct led_classdev { const char *name; unsigned int brightness; unsigned int max_brightness; + unsigned int color; int flags; /* Lower 16 bits reflect status */ @@ -313,6 +314,8 @@ extern struct led_classdev *of_led_get(struct device_node *np, int index); extern void led_put(struct led_classdev *led_cdev); struct led_classdev *__must_check devm_of_led_get(struct device *dev, int index); +struct led_classdev *__must_check devm_of_led_get_optional(struct device *dev, + int index); /** * led_blink_set - set blinking with software fallback diff --git a/include/linux/libata.h b/include/linux/libata.h index 820f7a3a2749..1dbb14daccfa 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -192,6 +192,7 @@ enum { ATA_PFLAG_UNLOADING = (1 << 9), /* driver is being unloaded */ ATA_PFLAG_UNLOADED = (1 << 10), /* driver is unloaded */ + ATA_PFLAG_RESUMING = (1 << 16), /* port is being resumed */ ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */ ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */ ATA_PFLAG_INIT_GTM_VALID = (1 << 19), /* initial gtm data valid */ @@ -222,6 +223,10 @@ enum { ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ ATA_HOST_IGNORE_ATA = (1 << 3), /* Ignore ATA devices on this host. */ + ATA_HOST_NO_PART = (1 << 4), /* Host does not support partial */ + ATA_HOST_NO_SSC = (1 << 5), /* Host does not support slumber */ + ATA_HOST_NO_DEVSLP = (1 << 6), /* Host does not support devslp */ + /* bits 24:31 of host->flags are reserved for LLD specific flags */ /* various lengths of time */ @@ -255,7 +260,7 @@ enum { * advised to wait only for the following duration before * doing SRST. */ - ATA_TMOUT_PMP_SRST_WAIT = 5000, + ATA_TMOUT_PMP_SRST_WAIT = 10000, /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might * be a spurious PHY event, so ignore the first PHY event that @@ -314,9 +319,10 @@ enum { ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ ATA_EH_GET_SUCCESS_SENSE = (1 << 6), /* Get sense data for successful cmd */ + ATA_EH_SET_ACTIVE = (1 << 7), /* Set a device to active power mode */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK | - ATA_EH_GET_SUCCESS_SENSE, + ATA_EH_GET_SUCCESS_SENSE | ATA_EH_SET_ACTIVE, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | ATA_EH_ENABLE_LINK, @@ -344,7 +350,6 @@ enum { ATA_LINK_RESUME_TRIES = 5, /* how hard are we gonna try to probe/recover devices */ - ATA_PROBE_MAX_TRIES = 3, ATA_EH_DEV_TRIES = 3, ATA_EH_PMP_TRIES = 5, ATA_EH_PMP_LINK_TRIES = 3, @@ -354,7 +359,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ @@ -651,7 +656,7 @@ struct ata_cpr { struct ata_cpr_log { u8 nr_cpr; - struct ata_cpr cpr[]; + struct ata_cpr cpr[] __counted_by(nr_cpr); }; struct ata_device { @@ -977,12 +982,6 @@ struct ata_port_operations { ssize_t size); /* - * Obsolete - */ - void (*phy_reset)(struct ata_port *ap); - void (*eng_timeout)(struct ata_port *ap); - - /* * ->inherits must be the last field and all the preceding * fields must be pointers. */ @@ -1116,7 +1115,7 @@ static inline void ata_sas_port_resume(struct ata_port *ap) extern int ata_ratelimit(void); extern void ata_msleep(struct ata_port *ap, unsigned int msecs); extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, - u32 val, unsigned long interval, unsigned long timeout); + u32 val, unsigned int interval, unsigned int timeout); extern int atapi_cmd_type(u8 opcode); extern unsigned int ata_pack_xfermask(unsigned int pio_mask, unsigned int mwdma_mask, @@ -1151,6 +1150,7 @@ extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); +extern int ata_scsi_slave_alloc(struct scsi_device *sdev); extern int ata_scsi_slave_config(struct scsi_device *sdev); extern void ata_scsi_slave_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, @@ -1166,11 +1166,11 @@ extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port * * SATA specific code - drivers/ata/libata-sata.c */ #ifdef CONFIG_SATA_HOST -extern const unsigned long sata_deb_timing_normal[]; -extern const unsigned long sata_deb_timing_hotplug[]; -extern const unsigned long sata_deb_timing_long[]; +extern const unsigned int sata_deb_timing_normal[]; +extern const unsigned int sata_deb_timing_hotplug[]; +extern const unsigned int sata_deb_timing_long[]; -static inline const unsigned long * +static inline const unsigned int * sata_ehc_deb_timing(struct ata_eh_context *ehc) { if (ehc->i.flags & ATA_EHI_HOTPLUGGED) @@ -1185,14 +1185,14 @@ extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); extern int sata_set_spd(struct ata_link *link); extern int sata_link_hardreset(struct ata_link *link, - const unsigned long *timing, unsigned long deadline, + const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); -extern int sata_link_resume(struct ata_link *link, const unsigned long *params, +extern int sata_link_resume(struct ata_link *link, const unsigned int *params, unsigned long deadline); extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link); extern void ata_eh_analyze_ncq_error(struct ata_link *link); #else -static inline const unsigned long * +static inline const unsigned int * sata_ehc_deb_timing(struct ata_eh_context *ehc) { return NULL; @@ -1212,7 +1212,7 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) } static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; } static inline int sata_link_hardreset(struct ata_link *link, - const unsigned long *timing, + const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)) @@ -1222,7 +1222,7 @@ static inline int sata_link_hardreset(struct ata_link *link, return -EOPNOTSUPP; } static inline int sata_link_resume(struct ata_link *link, - const unsigned long *params, + const unsigned int *params, unsigned long deadline) { return -EOPNOTSUPP; @@ -1234,20 +1234,15 @@ static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link) static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { } #endif extern int sata_link_debounce(struct ata_link *link, - const unsigned long *params, unsigned long deadline); + const unsigned int *params, unsigned long deadline); extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); -extern void ata_sas_port_destroy(struct ata_port *); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); -extern void ata_sas_async_probe(struct ata_port *ap); -extern int ata_sas_sync_probe(struct ata_port *ap); -extern int ata_sas_port_init(struct ata_port *); -extern int ata_sas_port_start(struct ata_port *ap); +extern void ata_port_probe(struct ata_port *ap); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); -extern void ata_sas_port_stop(struct ata_port *ap); extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, @@ -1404,6 +1399,7 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .this_id = ATA_SHT_THIS_ID, \ .emulated = ATA_SHT_EMULATED, \ .proc_name = drv_name, \ + .slave_alloc = ata_scsi_slave_alloc, \ .slave_destroy = ata_scsi_slave_destroy, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ @@ -1565,6 +1561,11 @@ void ata_port_desc(struct ata_port *ap, const char *fmt, ...); extern void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, const char *name); #endif +static inline void ata_port_desc_misc(struct ata_port *ap, int irq) +{ + ata_port_desc(ap, "irq %d", irq); + ata_port_desc(ap, "lpm-pol %d", ap->target_lpm_policy); +} static inline bool ata_tag_internal(unsigned int tag) { @@ -1785,7 +1786,7 @@ static inline struct ata_queued_cmd *ata_qc_from_tag(struct ata_port *ap, { struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - if (unlikely(!qc) || !ap->ops->error_handler) + if (unlikely(!qc)) return qc; if ((qc->flags & (ATA_QCFLAG_ACTIVE | @@ -1876,7 +1877,7 @@ static inline int ata_check_ready(u8 status) } static inline unsigned long ata_deadline(unsigned long from_jiffies, - unsigned long timeout_msecs) + unsigned int timeout_msecs) { return from_jiffies + msecs_to_jiffies(timeout_msecs); } @@ -1885,23 +1886,21 @@ static inline unsigned long ata_deadline(unsigned long from_jiffies, change in future hardware and specs, secondly 0xFF means 'no DMA' but is > UDMA_0. Dyma ddreigiau */ -static inline int ata_using_mwdma(struct ata_device *adev) +static inline bool ata_using_mwdma(struct ata_device *adev) { - if (adev->dma_mode >= XFER_MW_DMA_0 && adev->dma_mode <= XFER_MW_DMA_4) - return 1; - return 0; + return adev->dma_mode >= XFER_MW_DMA_0 && + adev->dma_mode <= XFER_MW_DMA_4; } -static inline int ata_using_udma(struct ata_device *adev) +static inline bool ata_using_udma(struct ata_device *adev) { - if (adev->dma_mode >= XFER_UDMA_0 && adev->dma_mode <= XFER_UDMA_7) - return 1; - return 0; + return adev->dma_mode >= XFER_UDMA_0 && + adev->dma_mode <= XFER_UDMA_7; } -static inline int ata_dma_enabled(struct ata_device *adev) +static inline bool ata_dma_enabled(struct ata_device *adev) { - return (adev->dma_mode == 0xFF ? 0 : 1); + return adev->dma_mode != 0xFF; } /************************************************************************** diff --git a/include/linux/limits.h b/include/linux/limits.h index f6bcc9369010..38eb7f6f7e88 100644 --- a/include/linux/limits.h +++ b/include/linux/limits.h @@ -10,6 +10,8 @@ #define SSIZE_MAX ((ssize_t)(SIZE_MAX >> 1)) #define PHYS_ADDR_MAX (~(phys_addr_t)0) +#define RESOURCE_SIZE_MAX ((resource_size_t)~0) + #define U8_MAX ((u8)~0U) #define S8_MAX ((s8)(U8_MAX >> 1)) #define S8_MIN ((s8)(-S8_MAX - 1)) diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 15e0e0209da4..7303b4bc2ce0 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -43,15 +43,6 @@ static inline void linkmode_set_bit(int nr, volatile unsigned long *addr) __set_bit(nr, addr); } -static inline void linkmode_set_bit_array(const int *array, int array_size, - unsigned long *addr) -{ - int i; - - for (i = 0; i < array_size; i++) - linkmode_set_bit(array[i], addr); -} - static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr) { __clear_bit(nr, addr); @@ -71,6 +62,15 @@ static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr) return test_bit(nr, addr); } +static inline void linkmode_set_bit_array(const int *array, int array_size, + unsigned long *addr) +{ + int i; + + for (i = 0; i < array_size; i++) + linkmode_set_bit(array[i], addr); +} + static inline int linkmode_equal(const unsigned long *src1, const unsigned long *src2) { diff --git a/include/linux/list.h b/include/linux/list.h index f10344dbad4d..1837caedf723 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -38,11 +38,92 @@ static inline void INIT_LIST_HEAD(struct list_head *list) WRITE_ONCE(list->prev, list); } +#ifdef CONFIG_LIST_HARDENED + #ifdef CONFIG_DEBUG_LIST -extern bool __list_add_valid(struct list_head *new, - struct list_head *prev, - struct list_head *next); -extern bool __list_del_entry_valid(struct list_head *entry); +# define __list_valid_slowpath +#else +# define __list_valid_slowpath __cold __preserve_most +#endif + +/* + * Performs the full set of list corruption checks before __list_add(). + * On list corruption reports a warning, and returns false. + */ +extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, + struct list_head *prev, + struct list_head *next); + +/* + * Performs list corruption checks before __list_add(). Returns false if a + * corruption is detected, true otherwise. + * + * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking + * inline to catch non-faulting corruptions, and only if a corruption is + * detected calls the reporting function __list_add_valid_or_report(). + */ +static __always_inline bool __list_add_valid(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + bool ret = true; + + if (!IS_ENABLED(CONFIG_DEBUG_LIST)) { + /* + * With the hardening version, elide checking if next and prev + * are NULL, since the immediate dereference of them below would + * result in a fault if NULL. + * + * With the reduced set of checks, we can afford to inline the + * checks, which also gives the compiler a chance to elide some + * of them completely if they can be proven at compile-time. If + * one of the pre-conditions does not hold, the slow-path will + * show a report which pre-condition failed. + */ + if (likely(next->prev == prev && prev->next == next && new != prev && new != next)) + return true; + ret = false; + } + + ret &= __list_add_valid_or_report(new, prev, next); + return ret; +} + +/* + * Performs the full set of list corruption checks before __list_del_entry(). + * On list corruption reports a warning, and returns false. + */ +extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); + +/* + * Performs list corruption checks before __list_del_entry(). Returns false if a + * corruption is detected, true otherwise. + * + * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking + * inline to catch non-faulting corruptions, and only if a corruption is + * detected calls the reporting function __list_del_entry_valid_or_report(). + */ +static __always_inline bool __list_del_entry_valid(struct list_head *entry) +{ + bool ret = true; + + if (!IS_ENABLED(CONFIG_DEBUG_LIST)) { + struct list_head *prev = entry->prev; + struct list_head *next = entry->next; + + /* + * With the hardening version, elide checking if next and prev + * are NULL, LIST_POISON1 or LIST_POISON2, since the immediate + * dereference of them below would result in a fault. + */ + if (likely(prev->next == entry && next->prev == entry)) + return true; + ret = false; + } + + ret &= __list_del_entry_valid_or_report(entry); + return ret; +} #else static inline bool __list_add_valid(struct list_head *new, struct list_head *prev, @@ -606,6 +687,14 @@ static inline void list_splice_tail_init(struct list_head *list, for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) /** + * list_for_each_reverse - iterate backwards over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_reverse(pos, head) \ + for (pos = (head)->prev; pos != (head); pos = pos->prev) + +/** * list_for_each_rcu - Iterate over a list in an RCU-safe fashion * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. diff --git a/include/linux/llist.h b/include/linux/llist.h index 85bda2d02d65..2c982ff7475a 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -74,6 +74,33 @@ static inline void init_llist_head(struct llist_head *list) } /** + * init_llist_node - initialize lock-less list node + * @node: the node to be initialised + * + * In cases where there is a need to test if a node is on + * a list or not, this initialises the node to clearly + * not be on any list. + */ +static inline void init_llist_node(struct llist_node *node) +{ + node->next = node; +} + +/** + * llist_on_list - test if a lock-list list node is on a list + * @node: the node to test + * + * When a node is on a list the ->next pointer will be NULL or + * some other node. It can never point to itself. We use that + * in init_llist_node() to record that a node is not on any list, + * and here to test whether it is on any list. + */ +static inline bool llist_on_list(const struct llist_node *node) +{ + return node->next != node; +} + +/** * llist_entry - get the struct of this entry * @ptr: the &struct llist_node pointer. * @type: the type of the struct this is embedded in. @@ -249,6 +276,25 @@ static inline struct llist_node *__llist_del_all(struct llist_head *head) extern struct llist_node *llist_del_first(struct llist_head *head); +/** + * llist_del_first_init - delete first entry from lock-list and mark is as being off-list + * @head: the head of lock-less list to delete from. + * + * This behave the same as llist_del_first() except that llist_init_node() is called + * on the returned node so that llist_on_list() will report false for the node. + */ +static inline struct llist_node *llist_del_first_init(struct llist_head *head) +{ + struct llist_node *n = llist_del_first(head); + + if (n) + init_llist_node(n); + return n; +} + +extern bool llist_del_first_this(struct llist_head *head, + struct llist_node *this); + struct llist_node *llist_reverse_order(struct llist_node *head); #endif /* LLIST_H */ diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index f42594a9efe0..9f565416d186 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -204,6 +204,8 @@ extern unsigned long nlmsvc_timeout; extern bool nsm_use_hostnames; extern u32 nsm_local_state; +extern struct timer_list nlmsvc_retry; + /* * Lockd client functions */ @@ -280,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, struct nlm_lock *, struct nlm_cookie *); __be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *); -unsigned long nlmsvc_retry_blocked(void); +void nlmsvc_retry_blocked(struct svc_rqst *rqstp); void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, nlm_host_match_fn_t match); void nlmsvc_grant_reply(struct nlm_cookie *, __be32); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 310f85903c91..dc2844b071c2 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -625,6 +625,12 @@ do { \ WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirq_context)); \ } while (0) +#define lockdep_assert_no_hardirq() \ +do { \ + WARN_ON_ONCE(__lockdep_enabled && (this_cpu_read(hardirq_context) || \ + !this_cpu_read(hardirqs_enabled))); \ +} while (0) + #define lockdep_assert_preemption_enabled() \ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ @@ -659,6 +665,7 @@ do { \ # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) # define lockdep_assert_in_irq() do { } while (0) +# define lockdep_assert_no_hardirq() do { } while (0) # define lockdep_assert_preemption_enabled() do { } while (0) # define lockdep_assert_preemption_disabled() do { } while (0) diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h index 54945aa824b4..babf4e3c28ba 100644 --- a/include/linux/logic_pio.h +++ b/include/linux/logic_pio.h @@ -39,9 +39,6 @@ struct logic_pio_host_ops { #ifdef CONFIG_INDIRECT_PIO u8 logic_inb(unsigned long addr); -void logic_outb(u8 value, unsigned long addr); -void logic_outw(u16 value, unsigned long addr); -void logic_outl(u32 value, unsigned long addr); u16 logic_inw(unsigned long addr); u32 logic_inl(unsigned long addr); void logic_outb(u8 value, unsigned long addr); diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 7308a1a7599b..ff217a5ce552 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -32,28 +32,29 @@ LSM_HOOK(int, 0, binder_transaction, const struct cred *from, LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from, const struct cred *to) LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from, - const struct cred *to, struct file *file) + const struct cred *to, const struct file *file) LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, unsigned int mode) LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) -LSM_HOOK(int, 0, capget, struct task_struct *target, kernel_cap_t *effective, +LSM_HOOK(int, 0, capget, const struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) LSM_HOOK(int, 0, capset, struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) LSM_HOOK(int, 0, capable, const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts) -LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, struct super_block *sb) +LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, const struct super_block *sb) LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) -LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) -LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *file) +LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) -LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) -LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, const struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, const struct linux_binprm *bprm) +LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, struct fs_context *src_sc) LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc, @@ -65,7 +66,7 @@ LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) LSM_HOOK(int, 0, sb_mnt_opts_compat, struct super_block *sb, void *mnt_opts) LSM_HOOK(int, 0, sb_remount, struct super_block *sb, void *mnt_opts) -LSM_HOOK(int, 0, sb_kern_mount, struct super_block *sb) +LSM_HOOK(int, 0, sb_kern_mount, const struct super_block *sb) LSM_HOOK(int, 0, sb_show_options, struct seq_file *m, struct super_block *sb) LSM_HOOK(int, 0, sb_statfs, struct dentry *dentry) LSM_HOOK(int, 0, sb_mount, const char *dev_name, const struct path *path, @@ -111,9 +112,9 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask, unsigned int obj_type) LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode) LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode) -LSM_HOOK(int, 0, inode_init_security, struct inode *inode, - struct inode *dir, const struct qstr *qstr, const char **name, - void **value, size_t *len) +LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode, + struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, + int *xattr_count) LSM_HOOK(int, 0, inode_init_security_anon, struct inode *inode, const struct qstr *name, const struct inode *context_inode) LSM_HOOK(int, 0, inode_create, struct inode *dir, struct dentry *dentry, @@ -272,7 +273,7 @@ LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) -LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx, u32 *ctxlen) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) @@ -316,7 +317,7 @@ LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) LSM_HOOK(void, LSM_RET_VOID, sk_clone_security, const struct sock *sk, struct sock *newsk) -LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, struct sock *sk, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, const struct sock *sk, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, sock_graft, struct sock *sk, struct socket *parent) LSM_HOOK(int, 0, inet_conn_request, const struct sock *sk, struct sk_buff *skb, struct request_sock *req) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index ab2b2fafa4a4..dcb5e5b5eb13 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -28,6 +28,7 @@ #include <linux/security.h> #include <linux/init.h> #include <linux/rculist.h> +#include <linux/xattr.h> union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); @@ -63,8 +64,27 @@ struct lsm_blob_sizes { int lbs_ipc; int lbs_msg_msg; int lbs_task; + int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ }; +/** + * lsm_get_xattr_slot - Return the next available slot and increment the index + * @xattrs: array storing LSM-provided xattrs + * @xattr_count: number of already stored xattrs (updated) + * + * Retrieve the first available slot in the @xattrs array to fill with an xattr, + * and increment @xattr_count. + * + * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise. + */ +static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs, + int *xattr_count) +{ + if (unlikely(!xattrs)) + return NULL; + return &xattrs[(*xattr_count)++]; +} + /* * LSM_RET_VOID is used as the default value in LSM_HOOK definitions for void * LSM hooks (in include/linux/lsm_hook_defs.h). diff --git a/include/linux/lwq.h b/include/linux/lwq.h new file mode 100644 index 000000000000..d081d5cf8e33 --- /dev/null +++ b/include/linux/lwq.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef LWQ_H +#define LWQ_H +/* + * Light-weight single-linked queue built from llist + * + * Entries can be enqueued from any context with no locking. + * Entries can be dequeued from process context with integrated locking. + * + * This is particularly suitable when work items are queued in + * BH or IRQ context, and where work items are handled one at a time + * by dedicated threads. + */ +#include <linux/container_of.h> +#include <linux/spinlock.h> +#include <linux/llist.h> + +struct lwq_node { + struct llist_node node; +}; + +struct lwq { + spinlock_t lock; + struct llist_node *ready; /* entries to be dequeued */ + struct llist_head new; /* entries being enqueued */ +}; + +/** + * lwq_init - initialise a lwq + * @q: the lwq object + */ +static inline void lwq_init(struct lwq *q) +{ + spin_lock_init(&q->lock); + q->ready = NULL; + init_llist_head(&q->new); +} + +/** + * lwq_empty - test if lwq contains any entry + * @q: the lwq object + * + * This empty test contains an acquire barrier so that if a wakeup + * is sent when lwq_dequeue returns true, it is safe to go to sleep after + * a test on lwq_empty(). + */ +static inline bool lwq_empty(struct lwq *q) +{ + /* acquire ensures ordering wrt lwq_enqueue() */ + return smp_load_acquire(&q->ready) == NULL && llist_empty(&q->new); +} + +struct llist_node *__lwq_dequeue(struct lwq *q); +/** + * lwq_dequeue - dequeue first (oldest) entry from lwq + * @q: the queue to dequeue from + * @type: the type of object to return + * @member: them member in returned object which is an lwq_node. + * + * Remove a single object from the lwq and return it. This will take + * a spinlock and so must always be called in the same context, typcially + * process contet. + */ +#define lwq_dequeue(q, type, member) \ + ({ struct llist_node *_n = __lwq_dequeue(q); \ + _n ? container_of(_n, type, member.node) : NULL; }) + +struct llist_node *lwq_dequeue_all(struct lwq *q); + +/** + * lwq_for_each_safe - iterate over detached queue allowing deletion + * @_n: iterator variable + * @_t1: temporary struct llist_node ** + * @_t2: temporary struct llist_node * + * @_l: address of llist_node pointer from lwq_dequeue_all() + * @_member: member in _n where lwq_node is found. + * + * Iterate over members in a dequeued list. If the iterator variable + * is set to NULL, the iterator removes that entry from the queue. + */ +#define lwq_for_each_safe(_n, _t1, _t2, _l, _member) \ + for (_t1 = (_l); \ + *(_t1) ? (_n = container_of(*(_t1), typeof(*(_n)), _member.node),\ + _t2 = ((*_t1)->next), \ + true) \ + : false; \ + (_n) ? (_t1 = &(_n)->_member.node.next, 0) \ + : ((*(_t1) = (_t2)), 0)) + +/** + * lwq_enqueue - add a new item to the end of the queue + * @n - the lwq_node embedded in the item to be added + * @q - the lwq to append to. + * + * No locking is needed to append to the queue so this can + * be called from any context. + * Return %true is the list may have previously been empty. + */ +static inline bool lwq_enqueue(struct lwq_node *n, struct lwq *q) +{ + /* acquire enqures ordering wrt lwq_dequeue */ + return llist_add(&n->node, &q->new) && + smp_load_acquire(&q->ready) == NULL; +} + +/** + * lwq_enqueue_batch - add a list of new items to the end of the queue + * @n - the lwq_node embedded in the first item to be added + * @q - the lwq to append to. + * + * No locking is needed to append to the queue so this can + * be called from any context. + * Return %true is the list may have previously been empty. + */ +static inline bool lwq_enqueue_batch(struct llist_node *n, struct lwq *q) +{ + struct llist_node *e = n; + + /* acquire enqures ordering wrt lwq_dequeue */ + return llist_add_batch(llist_reverse_order(n), e, &q->new) && + smp_load_acquire(&q->ready) == NULL; +} +#endif /* LWQ_H */ diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 295548cca8b3..d01e850b570f 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -29,14 +29,12 @@ #define MAPLE_NODE_SLOTS 31 /* 256 bytes including ->parent */ #define MAPLE_RANGE64_SLOTS 16 /* 256 bytes */ #define MAPLE_ARANGE64_SLOTS 10 /* 240 bytes */ -#define MAPLE_ARANGE64_META_MAX 15 /* Out of range for metadata */ #define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 1) #else /* 32bit sizes */ #define MAPLE_NODE_SLOTS 63 /* 256 bytes including ->parent */ #define MAPLE_RANGE64_SLOTS 32 /* 256 bytes */ #define MAPLE_ARANGE64_SLOTS 21 /* 240 bytes */ -#define MAPLE_ARANGE64_META_MAX 31 /* Out of range for metadata */ #define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 2) #endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */ @@ -184,13 +182,23 @@ enum maple_type { #ifdef CONFIG_LOCKDEP typedef struct lockdep_map *lockdep_map_p; -#define mt_lock_is_held(mt) lock_is_held(mt->ma_external_lock) +#define mt_lock_is_held(mt) \ + (!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock)) + +#define mt_write_lock_is_held(mt) \ + (!(mt)->ma_external_lock || \ + lock_is_held_type((mt)->ma_external_lock, 0)) + #define mt_set_external_lock(mt, lock) \ (mt)->ma_external_lock = &(lock)->dep_map + +#define mt_on_stack(mt) (mt).ma_external_lock = NULL #else typedef struct { /* nothing */ } lockdep_map_p; -#define mt_lock_is_held(mt) 1 +#define mt_lock_is_held(mt) 1 +#define mt_write_lock_is_held(mt) 1 #define mt_set_external_lock(mt, lock) do { } while (0) +#define mt_on_stack(mt) do { } while (0) #endif /* @@ -212,8 +220,8 @@ struct maple_tree { spinlock_t ma_lock; lockdep_map_p ma_external_lock; }; - void __rcu *ma_root; unsigned int ma_flags; + void __rcu *ma_root; }; /** @@ -420,6 +428,8 @@ struct ma_wr_state { #define MAS_ROOT ((struct maple_enode *)5UL) #define MAS_NONE ((struct maple_enode *)9UL) #define MAS_PAUSE ((struct maple_enode *)17UL) +#define MAS_OVERFLOW ((struct maple_enode *)33UL) +#define MAS_UNDERFLOW ((struct maple_enode *)65UL) #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) @@ -458,7 +468,7 @@ void *mas_find(struct ma_state *mas, unsigned long max); void *mas_find_range(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); void *mas_find_range_rev(struct ma_state *mas, unsigned long max); -int mas_preallocate(struct ma_state *mas, gfp_t gfp); +int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); bool mas_is_err(struct ma_state *mas); bool mas_nomem(struct ma_state *mas, gfp_t gfp); @@ -503,6 +513,15 @@ static inline bool mas_is_paused(const struct ma_state *mas) return mas->node == MAS_PAUSE; } +/* Check if the mas is pointing to a node or not */ +static inline bool mas_is_active(struct ma_state *mas) +{ + if ((unsigned long)mas->node >= MAPLE_RESERVED_RANGE) + return true; + + return false; +} + /** * mas_reset() - Reset a Maple Tree operation state. * @mas: Maple Tree operation state. @@ -531,6 +550,22 @@ static inline void mas_reset(struct ma_state *mas) */ #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) +/** + * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the + * current location. + * @mas: Maple Tree operation state. + * @start: New start of range in the Maple Tree. + * @last: New end of range in the Maple Tree. + * + * set the internal maple state values to a sub-range. + * Please use mas_set_range() if you do not know where you are in the tree. + */ +static inline void __mas_set_range(struct ma_state *mas, unsigned long start, + unsigned long last) +{ + mas->index = start; + mas->last = last; +} /** * mas_set_range() - Set up Maple Tree operation state for a different index. @@ -545,9 +580,8 @@ static inline void mas_reset(struct ma_state *mas) static inline void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { - mas->index = start; - mas->last = last; - mas->node = MAS_START; + __mas_set_range(mas, start, last); + mas->node = MAS_START; } /** @@ -662,10 +696,11 @@ void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max); * mt_for_each - Iterate over each entry starting at index until max. * @__tree: The Maple Tree * @__entry: The current entry - * @__index: The index to update to track the location in the tree + * @__index: The index to start the search from. Subsequently used as iterator. * @__max: The maximum limit for @index * - * Note: Will not return the zero entry. + * This iterator skips all entries, which resolve to a NULL pointer, + * e.g. entries which has been reserved with XA_ZERO_ENTRY. */ #define mt_for_each(__tree, __entry, __index, __max) \ for (__entry = mt_find(__tree, &(__index), __max); \ diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 0f06c2287b52..9b54c4f0677f 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -25,6 +25,7 @@ #define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 #define MARVELL_PHY_ID_88X2222 0x01410f10 +#define MARVELL_PHY_ID_88Q2110 0x002b0980 /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 diff --git a/include/linux/math.h b/include/linux/math.h index 2d388650c556..dd4152711de7 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -156,6 +156,25 @@ __STRUCT_FRACT(u32) ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) /** + * abs_diff - return absolute value of the difference between the arguments + * @a: the first argument + * @b: the second argument + * + * @a and @b have to be of the same type. With this restriction we compare + * signed to signed and unsigned to unsigned. The result is the subtraction + * the smaller of the two from the bigger, hence result is always a positive + * value. + * + * Return: an absolute value of the difference between the @a and @b. + */ +#define abs_diff(a, b) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + (void)(&__a == &__b); \ + __a > __b ? (__a - __b) : (__b - __a); \ +}) + +/** * reciprocal_scale - "scale" a value into range [0, ep_ro) * @val: value * @ep_ro: right open interval endpoint diff --git a/include/linux/mcb.h b/include/linux/mcb.h index 1e5893138afe..0b971b24a804 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -63,7 +63,6 @@ static inline struct mcb_bus *to_mcb_bus(struct device *dev) struct mcb_device { struct device dev; struct mcb_bus *bus; - bool is_added; struct mcb_driver *driver; u16 id; int inst; diff --git a/include/linux/mdio.h b/include/linux/mdio.h index c1b7008826e5..007fd9c3e4b6 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -420,7 +420,7 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) * A function that translates value of following registers to the linkmode: * IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20) * IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60) - * IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61) + * IEEE 802.3-2018 45.2.7.14 "EEE link partner ability 1" register (7.61) */ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) { @@ -537,6 +537,8 @@ static inline void mii_c73_mod_linkmode(unsigned long *adv, u16 *lpa) int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int __mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, + u16 set); int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set); @@ -564,6 +566,30 @@ int mdiobus_c45_modify(struct mii_bus *bus, int addr, int devad, u32 regnum, int mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, int devad, u32 regnum, u16 mask, u16 set); +static inline int __mdiodev_read(struct mdio_device *mdiodev, u32 regnum) +{ + return __mdiobus_read(mdiodev->bus, mdiodev->addr, regnum); +} + +static inline int __mdiodev_write(struct mdio_device *mdiodev, u32 regnum, + u16 val) +{ + return __mdiobus_write(mdiodev->bus, mdiodev->addr, regnum, val); +} + +static inline int __mdiodev_modify(struct mdio_device *mdiodev, u32 regnum, + u16 mask, u16 set) +{ + return __mdiobus_modify(mdiodev->bus, mdiodev->addr, regnum, mask, set); +} + +static inline int __mdiodev_modify_changed(struct mdio_device *mdiodev, + u32 regnum, u16 mask, u16 set) +{ + return __mdiobus_modify_changed(mdiodev->bus, mdiodev->addr, regnum, + mask, set); +} + static inline int mdiodev_read(struct mdio_device *mdiodev, u32 regnum) { return mdiobus_read(mdiodev->bus, mdiodev->addr, regnum); diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index fd6e0620658d..b38a56a13f39 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -31,11 +31,11 @@ typedef void (*mei_cldev_cb_t)(struct mei_cl_device *cldev); * @rx_work: async work to execute Rx event callback * @rx_cb: Drivers register this callback to get asynchronous ME * Rx buffer pending notifications. - * @notif_work: async work to execute FW notif event callback + * @notif_work: async work to execute FW notify event callback * @notif_cb: Drivers register this callback to get asynchronous ME * FW notification pending notifications. * - * @do_match: wheather device can be matched with a driver + * @do_match: whether the device can be matched with a driver * @is_added: device is already scanned * @priv_data: client private data */ @@ -94,15 +94,23 @@ void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv); ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf, size_t length); +ssize_t mei_cldev_send_timeout(struct mei_cl_device *cldev, const u8 *buf, + size_t length, unsigned long timeout); ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf, size_t length); +ssize_t mei_cldev_recv_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, + unsigned long timeout); ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf, size_t length, u8 vtag); +ssize_t mei_cldev_send_vtag_timeout(struct mei_cl_device *cldev, const u8 *buf, + size_t length, u8 vtag, unsigned long timeout); ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag); ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag); +ssize_t mei_cldev_recv_vtag_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, + u8 *vtag, unsigned long timeout); int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb); int mei_cldev_register_notif_cb(struct mei_cl_device *cldev, diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f71ff9f0ec81..ae3bde302f70 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn; * via a driver, and never indicated in the firmware-provided memory map as * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the * kernel resource tree. + * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are + * not initialized (only for reserved regions). */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ @@ -47,6 +49,7 @@ enum memblock_flags { MEMBLOCK_MIRROR = 0x2, /* mirrored region */ MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ + MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ }; /** @@ -125,6 +128,7 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); +int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size); void memblock_free_all(void); void memblock_free(void *ptr, size_t size); @@ -259,6 +263,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m) return m->flags & MEMBLOCK_NOMAP; } +static inline bool memblock_is_reserved_noinit(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_RSRV_NOINIT; +} + static inline bool memblock_is_driver_managed(struct memblock_region *m) { return m->flags & MEMBLOCK_DRIVER_MANAGED; @@ -581,9 +590,7 @@ extern void *alloc_large_system_hash(const char *tablename, unsigned long high_limit); #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ -#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min - * shift passed via *_hash_shift */ -#define HASH_ZERO 0x00000004 /* Zero allocated hash table */ +#define HASH_ZERO 0x00000002 /* Zero allocated hash table */ /* Only NUMA needs hash distribution. 64bit NUMA architectures have * sufficient vmalloc space. @@ -596,13 +603,11 @@ extern int hashdist; /* Distribute hashes across NUMA nodes? */ #endif #ifdef CONFIG_MEMTEST -extern phys_addr_t early_memtest_bad_size; /* Size of faulty ram found by memtest */ -extern bool early_memtest_done; /* Was early memtest done? */ -extern void early_memtest(phys_addr_t start, phys_addr_t end); +void early_memtest(phys_addr_t start, phys_addr_t end); +void memtest_report_meminfo(struct seq_file *m); #else -static inline void early_memtest(phys_addr_t start, phys_addr_t end) -{ -} +static inline void early_memtest(phys_addr_t start, phys_addr_t end) { } +static inline void memtest_report_meminfo(struct seq_file *m) { } #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5818af8eca5a..7bdcf3020d7a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -21,6 +21,7 @@ #include <linux/vmstat.h> #include <linux/writeback.h> #include <linux/page-flags.h> +#include <linux/shrinker.h> struct mem_cgroup; struct obj_cgroup; @@ -61,7 +62,6 @@ struct mem_cgroup_reclaim_cookie { #ifdef CONFIG_MEMCG #define MEM_CGROUP_ID_SHIFT 16 -#define MEM_CGROUP_ID_MAX USHRT_MAX struct mem_cgroup_id { int id; @@ -89,17 +89,6 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; -/* - * Bitmap and deferred work of shrinker::id corresponding to memcg-aware - * shrinkers, which have elements charged to this memcg. - */ -struct shrinker_info { - struct rcu_head rcu; - atomic_long_t *nr_deferred; - unsigned long *map; - int map_nr_max; -}; - struct lruvec_stats_percpu { /* Local (CPU and cgroup) state */ long state[NR_VM_NODE_STAT_ITEMS]; @@ -112,6 +101,9 @@ struct lruvec_stats { /* Aggregated (CPU and subtree) state */ long state[NR_VM_NODE_STAT_ITEMS]; + /* Non-hierarchical (CPU aggregated) state */ + long state_local[NR_VM_NODE_STAT_ITEMS]; + /* Pending child counts during tree propagation */ long state_pending[NR_VM_NODE_STAT_ITEMS]; }; @@ -151,7 +143,7 @@ struct mem_cgroup_threshold_ary { /* Size of entries[] */ unsigned int size; /* Array of thresholds */ - struct mem_cgroup_threshold entries[]; + struct mem_cgroup_threshold entries[] __counted_by(size); }; struct mem_cgroup_thresholds { @@ -284,6 +276,11 @@ struct mem_cgroup { atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS]; + /* + * Hint of reclaim pressure for socket memroy management. Note + * that this indicator should NOT be used in legacy cgroup mode + * where socket memory is accounted/charged separately. + */ unsigned long socket_pressure; /* Legacy tcp memory accounting */ @@ -292,7 +289,13 @@ struct mem_cgroup { #ifdef CONFIG_MEMCG_KMEM int kmemcg_id; - struct obj_cgroup __rcu *objcg; + /* + * memcg->objcg is wiped out as a part of the objcg repaprenting + * process. memcg->orig_objcg preserves a pointer (and a reference) + * to the original objcg until the end of live of memcg. + */ + struct obj_cgroup __rcu *objcg; + struct obj_cgroup *orig_objcg; /* list of inherited objcgs, protected by objcg_lock */ struct list_head objcg_list; #endif @@ -583,7 +586,7 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root, /* * There is no reclaim protection applied to a targeted reclaim. * We are special casing this specific case here because - * mem_cgroup_protected calculation is not robust enough to keep + * mem_cgroup_calculate_protection is not robust enough to keep * the protection invariant for calculated effective values for * parallel reclaimers with different reclaim target. This is * especially a problem for tail memcgs (as they have pages on LRU) @@ -655,6 +658,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, page_counter_read(&memcg->memory); } +void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg); + int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); /** @@ -679,6 +684,9 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, return __mem_cgroup_charge(folio, mm, gfp); } +int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, + long nr_pages); + int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); @@ -706,6 +714,10 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) __mem_cgroup_uncharge_list(page_list); } +void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages); + +void mem_cgroup_replace_folio(struct folio *old, struct folio *new); + void mem_cgroup_migrate(struct folio *old, struct folio *new); /** @@ -762,6 +774,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); +struct mem_cgroup *get_mem_cgroup_from_current(void); + struct lruvec *folio_lruvec_lock(struct folio *folio); struct lruvec *folio_lruvec_lock_irq(struct folio *folio); struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, @@ -861,8 +875,7 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) * parent_mem_cgroup - find the accounting parent of a memcg * @memcg: memcg whose parent to find * - * Returns the parent memcg, or NULL if this is the root or the memory - * controller is in legacy no-hierarchy mode. + * Returns the parent memcg, or NULL if this is the root. */ static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) { @@ -914,7 +927,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } -void mem_cgroup_handle_over_high(void); +void mem_cgroup_handle_over_high(gfp_t gfp_mask); unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); @@ -1020,14 +1033,12 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, { struct mem_cgroup_per_node *pn; long x = 0; - int cpu; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - for_each_possible_cpu(cpu) - x += per_cpu(pn->lruvec_stats_percpu->state[idx], cpu); + x = READ_ONCE(pn->lruvec_stats.state_local[idx]); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -1076,15 +1087,6 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, local_irq_restore(flags); } -static inline void count_memcg_page_event(struct page *page, - enum vm_event_item idx) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - count_memcg_events(memcg, idx, 1); -} - static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) { @@ -1158,7 +1160,6 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, #else /* CONFIG_MEMCG */ #define MEM_CGROUP_ID_SHIFT 0 -#define MEM_CGROUP_ID_MAX 0 static inline struct mem_cgroup *folio_memcg(struct folio *folio) { @@ -1246,12 +1247,23 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, return false; } +static inline void mem_cgroup_commit_charge(struct folio *folio, + struct mem_cgroup *memcg) +{ +} + static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) { return 0; } +static inline int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, + gfp_t gfp, long nr_pages) +{ + return 0; +} + static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) { @@ -1270,6 +1282,16 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) { } +static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, + unsigned int nr_pages) +{ +} + +static inline void mem_cgroup_replace_folio(struct folio *old, + struct folio *new) +{ +} + static inline void mem_cgroup_migrate(struct folio *old, struct folio *new) { } @@ -1307,6 +1329,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) return NULL; } +static inline struct mem_cgroup *get_mem_cgroup_from_current(void) +{ + return NULL; +} + static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) { @@ -1455,7 +1482,7 @@ static inline void mem_cgroup_unlock_pages(void) rcu_read_unlock(); } -static inline void mem_cgroup_handle_over_high(void) +static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask) { } @@ -1562,11 +1589,6 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg, { } -static inline void count_memcg_page_event(struct page *page, - int idx) -{ -} - static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) { @@ -1727,8 +1749,8 @@ void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure) - return true; + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return !!memcg->tcpmem_pressure; do { if (time_before(jiffies, READ_ONCE(memcg->socket_pressure))) return true; @@ -1760,8 +1782,26 @@ bool mem_cgroup_kmem_disabled(void); int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge_page(struct page *page, int order); -struct obj_cgroup *get_obj_cgroup_from_current(void); -struct obj_cgroup *get_obj_cgroup_from_page(struct page *page); +/* + * The returned objcg pointer is safe to use without additional + * protection within a scope. The scope is defined either by + * the current task (similar to the "current" global variable) + * or by set_active_memcg() pair. + * Please, use obj_cgroup_get() to get a reference if the pointer + * needs to be used outside of the local scope. + */ +struct obj_cgroup *current_obj_cgroup(void); +struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio); + +static inline struct obj_cgroup *get_obj_cgroup_from_current(void) +{ + struct obj_cgroup *objcg = current_obj_cgroup(); + + if (objcg) + obj_cgroup_get(objcg); + + return objcg; +} int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); @@ -1845,7 +1885,7 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order) { } -static inline struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) +static inline struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) { return NULL; } diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index fc9647b1b4f9..1e39d27bee41 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -6,6 +6,7 @@ #include <linux/nodemask.h> #include <linux/kref.h> #include <linux/mmzone.h> +#include <linux/notifier.h> /* * Each tier cover a abstrace distance chunk size of 128 */ @@ -22,7 +23,9 @@ struct memory_tier; struct memory_dev_type { /* list of memory types that are part of same tier as this type */ - struct list_head tier_sibiling; + struct list_head tier_sibling; + /* list of memory types that are managed by one driver */ + struct list_head list; /* abstract distance for this specific memory type */ int adistance; /* Nodes of same abstract distance */ @@ -30,12 +33,21 @@ struct memory_dev_type { struct kref kref; }; +struct node_hmem_attrs; + #ifdef CONFIG_NUMA extern bool numa_demotion_enabled; +extern struct memory_dev_type *default_dram_type; struct memory_dev_type *alloc_memory_type(int adistance); -void destroy_memory_type(struct memory_dev_type *memtype); +void put_memory_type(struct memory_dev_type *memtype); void init_node_memory_type(int node, struct memory_dev_type *default_type); void clear_node_memory_type(int node, struct memory_dev_type *memtype); +int register_mt_adistance_algorithm(struct notifier_block *nb); +int unregister_mt_adistance_algorithm(struct notifier_block *nb); +int mt_calc_adistance(int node, int *adist); +int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, + const char *source); +int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist); #ifdef CONFIG_MIGRATION int next_demotion_node(int node); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); @@ -60,6 +72,7 @@ static inline bool node_is_toptier(int node) #else #define numa_demotion_enabled false +#define default_dram_type NULL /* * CONFIG_NUMA implementation returns non NULL error. */ @@ -68,7 +81,7 @@ static inline struct memory_dev_type *alloc_memory_type(int adistance) return NULL; } -static inline void destroy_memory_type(struct memory_dev_type *memtype) +static inline void put_memory_type(struct memory_dev_type *memtype) { } @@ -97,5 +110,31 @@ static inline bool node_is_toptier(int node) { return true; } + +static inline int register_mt_adistance_algorithm(struct notifier_block *nb) +{ + return 0; +} + +static inline int unregister_mt_adistance_algorithm(struct notifier_block *nb) +{ + return 0; +} + +static inline int mt_calc_adistance(int node, int *adist) +{ + return NOTIFY_DONE; +} + +static inline int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, + const char *source) +{ + return -EIO; +} + +static inline int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist) +{ + return -EIO; +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/memory.h b/include/linux/memory.h index 31343566c221..f53cfdaaaa41 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -77,11 +77,7 @@ struct memory_block { */ struct zone *zone; struct device dev; - /* - * Number of vmemmap pages. These pages - * lay at the beginning of the memory block. - */ - unsigned long nr_vmemmap_pages; + struct vmem_altmap *altmap; struct memory_group *group; /* group (if any) for this block */ struct list_head group_next; /* next block inside memory group */ #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) @@ -147,7 +143,7 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size, - unsigned long vmemmap_pages, + struct vmem_altmap *altmap, struct memory_group *group); void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 013c69753c91..7d2076583494 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -97,6 +97,8 @@ typedef int __bitwise mhp_t; * To do so, we will use the beginning of the hot-added range to build * the page tables for the memmap array that describes the entire range. * Only selected architectures support it with SPARSE_VMEMMAP. + * This is only a hint, the core kernel can decide to not do this based on + * different alignment checks. */ #define MHP_MEMMAP_ON_MEMORY ((__force mhp_t)BIT(1)) /* @@ -354,7 +356,6 @@ extern struct zone *zone_for_pfn_range(int online_type, int nid, extern int arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params); void arch_remove_linear_mapping(u64 start, u64 size); -extern bool mhp_supports_memmap_on_memory(unsigned long size); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index d232de7cdc56..931b118336f4 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -17,6 +17,8 @@ struct mm_struct; +#define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */ + #ifdef CONFIG_NUMA /* @@ -89,8 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol) return pol; } -#define vma_policy(vma) ((vma)->vm_policy) - static inline void mpol_get(struct mempolicy *pol) { if (pol) @@ -107,35 +107,30 @@ static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) /* * Tree of shared policies for a shared memory region. - * Maintain the policies in a pseudo mm that contains vmas. The vmas - * carry the policy. As a special twist the pseudo mm is indexed in pages, not - * bytes, so that we can work with shared memory segments bigger than - * unsigned long. */ - -struct sp_node { - struct rb_node nd; - unsigned long start, end; - struct mempolicy *policy; -}; - struct shared_policy { struct rb_root root; rwlock_t lock; }; +struct sp_node { + struct rb_node nd; + pgoff_t start, end; + struct mempolicy *policy; +}; int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); -int mpol_set_shared_policy(struct shared_policy *info, - struct vm_area_struct *vma, - struct mempolicy *new); -void mpol_free_shared_policy(struct shared_policy *p); +int mpol_set_shared_policy(struct shared_policy *sp, + struct vm_area_struct *vma, struct mempolicy *mpol); +void mpol_free_shared_policy(struct shared_policy *sp); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, - unsigned long idx); + pgoff_t idx); struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, - unsigned long addr); + unsigned long addr, pgoff_t *ilx); +struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr, int order, pgoff_t *ilx); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); @@ -149,8 +144,6 @@ extern int huge_node(struct vm_area_struct *vma, extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); -extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); - extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; @@ -174,7 +167,7 @@ extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ extern bool vma_migratable(struct vm_area_struct *vma); -extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); +int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); static inline bool mpol_is_preferred_many(struct mempolicy *pol) @@ -188,12 +181,17 @@ extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); struct mempolicy {}; +static inline struct mempolicy *get_task_policy(struct task_struct *p) +{ + return NULL; +} + static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { return true; } -static inline void mpol_put(struct mempolicy *p) +static inline void mpol_put(struct mempolicy *pol) { } @@ -212,17 +210,22 @@ static inline void mpol_shared_policy_init(struct shared_policy *sp, { } -static inline void mpol_free_shared_policy(struct shared_policy *p) +static inline void mpol_free_shared_policy(struct shared_policy *sp) { } static inline struct mempolicy * -mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) +mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) { return NULL; } -#define vma_policy(vma) NULL +static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr, int order, pgoff_t *ilx) +{ + *ilx = 0; + return NULL; +} static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) @@ -278,7 +281,8 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol) } #endif -static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, +static inline int mpol_misplaced(struct folio *folio, + struct vm_area_struct *vma, unsigned long address) { return -1; /* no node preference */ diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h index 473545a2c425..6fa21791fc85 100644 --- a/include/linux/mfd/88pm860x.h +++ b/include/linux/mfd/88pm860x.h @@ -472,13 +472,7 @@ extern int pm860x_bulk_read(struct i2c_client *, int, int, unsigned char *); extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *); extern int pm860x_set_bits(struct i2c_client *, int, unsigned char, unsigned char); -extern int pm860x_page_reg_read(struct i2c_client *, int); extern int pm860x_page_reg_write(struct i2c_client *, int, unsigned char); extern int pm860x_page_bulk_read(struct i2c_client *, int, int, unsigned char *); -extern int pm860x_page_bulk_write(struct i2c_client *, int, int, - unsigned char *); -extern int pm860x_page_set_bits(struct i2c_client *, int, unsigned char, - unsigned char); - #endif /* __LINUX_MFD_88PM860X_H */ diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index 302a330c5c84..76d326ea8eba 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -382,10 +382,6 @@ struct ab8500_platform_data { struct ab8500_sysctrl_platform_data *sysctrl; }; -extern int ab8500_init(struct ab8500 *ab8500, - enum ab8500_version version); -extern int ab8500_exit(struct ab8500 *ab8500); - extern int ab8500_suspend(struct ab8500 *ab8500); static inline int is_ab8500(struct ab8500 *ab) @@ -503,13 +499,7 @@ static inline int is_ab9540_2p0_or_earlier(struct ab8500 *ab) void ab8500_override_turn_on_stat(u8 mask, u8 set); -#ifdef CONFIG_AB8500_DEBUG -extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size); -void ab8500_dump_all_banks(struct device *dev); -void ab8500_debug_register_interrupt(int line); -#else static inline void ab8500_dump_all_banks(struct device *dev) {} static inline void ab8500_debug_register_interrupt(int line) {} -#endif #endif /* MFD_AB8500_H */ diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 47e7a3a61ce6..e8bcad641d8c 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -92,7 +92,7 @@ struct mfd_cell { * (above) when matching OF nodes with devices that have identical * compatible strings */ - const u64 of_reg; + u64 of_reg; /* Set to 'true' to use 'of_reg' (above) - allows for of_reg=0 */ bool use_of_reg; diff --git a/include/linux/mfd/cs42l43-regs.h b/include/linux/mfd/cs42l43-regs.h new file mode 100644 index 000000000000..c39a49269cb7 --- /dev/null +++ b/include/linux/mfd/cs42l43-regs.h @@ -0,0 +1,1184 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * cs42l43 register definitions + * + * Copyright (c) 2022-2023 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#ifndef CS42L43_CORE_REGS_H +#define CS42L43_CORE_REGS_H + +/* Registers */ +#define CS42L43_GEN_INT_STAT_1 0x000000C0 +#define CS42L43_GEN_INT_MASK_1 0x000000C1 +#define CS42L43_DEVID 0x00003000 +#define CS42L43_REVID 0x00003004 +#define CS42L43_RELID 0x0000300C +#define CS42L43_SFT_RESET 0x00003020 +#define CS42L43_DRV_CTRL1 0x00006004 +#define CS42L43_DRV_CTRL3 0x0000600C +#define CS42L43_DRV_CTRL4 0x00006010 +#define CS42L43_DRV_CTRL_5 0x00006014 +#define CS42L43_GPIO_CTRL1 0x00006034 +#define CS42L43_GPIO_CTRL2 0x00006038 +#define CS42L43_GPIO_STS 0x0000603C +#define CS42L43_GPIO_FN_SEL 0x00006040 +#define CS42L43_MCLK_SRC_SEL 0x00007004 +#define CS42L43_CCM_BLK_CLK_CONTROL 0x00007010 +#define CS42L43_SAMPLE_RATE1 0x00007014 +#define CS42L43_SAMPLE_RATE2 0x00007018 +#define CS42L43_SAMPLE_RATE3 0x0000701C +#define CS42L43_SAMPLE_RATE4 0x00007020 +#define CS42L43_PLL_CONTROL 0x00007034 +#define CS42L43_FS_SELECT1 0x00007038 +#define CS42L43_FS_SELECT2 0x0000703C +#define CS42L43_FS_SELECT3 0x00007040 +#define CS42L43_FS_SELECT4 0x00007044 +#define CS42L43_PDM_CONTROL 0x0000704C +#define CS42L43_ASP_CLK_CONFIG1 0x00007058 +#define CS42L43_ASP_CLK_CONFIG2 0x0000705C +#define CS42L43_OSC_DIV_SEL 0x00007068 +#define CS42L43_ADC_B_CTRL1 0x00008000 +#define CS42L43_ADC_B_CTRL2 0x00008004 +#define CS42L43_DECIM_HPF_WNF_CTRL1 0x0000803C +#define CS42L43_DECIM_HPF_WNF_CTRL2 0x00008040 +#define CS42L43_DECIM_HPF_WNF_CTRL3 0x00008044 +#define CS42L43_DECIM_HPF_WNF_CTRL4 0x00008048 +#define CS42L43_DMIC_PDM_CTRL 0x0000804C +#define CS42L43_DECIM_VOL_CTRL_CH1_CH2 0x00008050 +#define CS42L43_DECIM_VOL_CTRL_CH3_CH4 0x00008054 +#define CS42L43_DECIM_VOL_CTRL_UPDATE 0x00008058 +#define CS42L43_INTP_VOLUME_CTRL1 0x00009008 +#define CS42L43_INTP_VOLUME_CTRL2 0x0000900C +#define CS42L43_AMP1_2_VOL_RAMP 0x00009010 +#define CS42L43_ASP_CTRL 0x0000A000 +#define CS42L43_ASP_FSYNC_CTRL1 0x0000A004 +#define CS42L43_ASP_FSYNC_CTRL2 0x0000A008 +#define CS42L43_ASP_FSYNC_CTRL3 0x0000A00C +#define CS42L43_ASP_FSYNC_CTRL4 0x0000A010 +#define CS42L43_ASP_DATA_CTRL 0x0000A018 +#define CS42L43_ASP_RX_EN 0x0000A020 +#define CS42L43_ASP_TX_EN 0x0000A024 +#define CS42L43_ASP_RX_CH1_CTRL 0x0000A028 +#define CS42L43_ASP_RX_CH2_CTRL 0x0000A02C +#define CS42L43_ASP_RX_CH3_CTRL 0x0000A030 +#define CS42L43_ASP_RX_CH4_CTRL 0x0000A034 +#define CS42L43_ASP_RX_CH5_CTRL 0x0000A038 +#define CS42L43_ASP_RX_CH6_CTRL 0x0000A03C +#define CS42L43_ASP_TX_CH1_CTRL 0x0000A068 +#define CS42L43_ASP_TX_CH2_CTRL 0x0000A06C +#define CS42L43_ASP_TX_CH3_CTRL 0x0000A070 +#define CS42L43_ASP_TX_CH4_CTRL 0x0000A074 +#define CS42L43_ASP_TX_CH5_CTRL 0x0000A078 +#define CS42L43_ASP_TX_CH6_CTRL 0x0000A07C +#define CS42L43_OTP_REVISION_ID 0x0000B02C +#define CS42L43_ASPTX1_INPUT 0x0000C200 +#define CS42L43_ASPTX2_INPUT 0x0000C210 +#define CS42L43_ASPTX3_INPUT 0x0000C220 +#define CS42L43_ASPTX4_INPUT 0x0000C230 +#define CS42L43_ASPTX5_INPUT 0x0000C240 +#define CS42L43_ASPTX6_INPUT 0x0000C250 +#define CS42L43_SWIRE_DP1_CH1_INPUT 0x0000C280 +#define CS42L43_SWIRE_DP1_CH2_INPUT 0x0000C290 +#define CS42L43_SWIRE_DP1_CH3_INPUT 0x0000C2A0 +#define CS42L43_SWIRE_DP1_CH4_INPUT 0x0000C2B0 +#define CS42L43_SWIRE_DP2_CH1_INPUT 0x0000C2C0 +#define CS42L43_SWIRE_DP2_CH2_INPUT 0x0000C2D0 +#define CS42L43_SWIRE_DP3_CH1_INPUT 0x0000C2E0 +#define CS42L43_SWIRE_DP3_CH2_INPUT 0x0000C2F0 +#define CS42L43_SWIRE_DP4_CH1_INPUT 0x0000C300 +#define CS42L43_SWIRE_DP4_CH2_INPUT 0x0000C310 +#define CS42L43_ASRC_INT1_INPUT1 0x0000C400 +#define CS42L43_ASRC_INT2_INPUT1 0x0000C410 +#define CS42L43_ASRC_INT3_INPUT1 0x0000C420 +#define CS42L43_ASRC_INT4_INPUT1 0x0000C430 +#define CS42L43_ASRC_DEC1_INPUT1 0x0000C440 +#define CS42L43_ASRC_DEC2_INPUT1 0x0000C450 +#define CS42L43_ASRC_DEC3_INPUT1 0x0000C460 +#define CS42L43_ASRC_DEC4_INPUT1 0x0000C470 +#define CS42L43_ISRC1INT1_INPUT1 0x0000C500 +#define CS42L43_ISRC1INT2_INPUT1 0x0000C510 +#define CS42L43_ISRC1DEC1_INPUT1 0x0000C520 +#define CS42L43_ISRC1DEC2_INPUT1 0x0000C530 +#define CS42L43_ISRC2INT1_INPUT1 0x0000C540 +#define CS42L43_ISRC2INT2_INPUT1 0x0000C550 +#define CS42L43_ISRC2DEC1_INPUT1 0x0000C560 +#define CS42L43_ISRC2DEC2_INPUT1 0x0000C570 +#define CS42L43_EQ1MIX_INPUT1 0x0000C580 +#define CS42L43_EQ1MIX_INPUT2 0x0000C584 +#define CS42L43_EQ1MIX_INPUT3 0x0000C588 +#define CS42L43_EQ1MIX_INPUT4 0x0000C58C +#define CS42L43_EQ2MIX_INPUT1 0x0000C590 +#define CS42L43_EQ2MIX_INPUT2 0x0000C594 +#define CS42L43_EQ2MIX_INPUT3 0x0000C598 +#define CS42L43_EQ2MIX_INPUT4 0x0000C59C +#define CS42L43_SPDIF1_INPUT1 0x0000C600 +#define CS42L43_SPDIF2_INPUT1 0x0000C610 +#define CS42L43_AMP1MIX_INPUT1 0x0000C620 +#define CS42L43_AMP1MIX_INPUT2 0x0000C624 +#define CS42L43_AMP1MIX_INPUT3 0x0000C628 +#define CS42L43_AMP1MIX_INPUT4 0x0000C62C +#define CS42L43_AMP2MIX_INPUT1 0x0000C630 +#define CS42L43_AMP2MIX_INPUT2 0x0000C634 +#define CS42L43_AMP2MIX_INPUT3 0x0000C638 +#define CS42L43_AMP2MIX_INPUT4 0x0000C63C +#define CS42L43_AMP3MIX_INPUT1 0x0000C640 +#define CS42L43_AMP3MIX_INPUT2 0x0000C644 +#define CS42L43_AMP3MIX_INPUT3 0x0000C648 +#define CS42L43_AMP3MIX_INPUT4 0x0000C64C +#define CS42L43_AMP4MIX_INPUT1 0x0000C650 +#define CS42L43_AMP4MIX_INPUT2 0x0000C654 +#define CS42L43_AMP4MIX_INPUT3 0x0000C658 +#define CS42L43_AMP4MIX_INPUT4 0x0000C65C +#define CS42L43_ASRC_INT_ENABLES 0x0000E000 +#define CS42L43_ASRC_DEC_ENABLES 0x0000E004 +#define CS42L43_PDNCNTL 0x00010000 +#define CS42L43_RINGSENSE_DEB_CTRL 0x0001001C +#define CS42L43_TIPSENSE_DEB_CTRL 0x00010020 +#define CS42L43_TIP_RING_SENSE_INTERRUPT_STATUS 0x00010028 +#define CS42L43_HS2 0x00010040 +#define CS42L43_HS_STAT 0x00010048 +#define CS42L43_MCU_SW_INTERRUPT 0x00010094 +#define CS42L43_STEREO_MIC_CTRL 0x000100A4 +#define CS42L43_STEREO_MIC_CLAMP_CTRL 0x000100C4 +#define CS42L43_BLOCK_EN2 0x00010104 +#define CS42L43_BLOCK_EN3 0x00010108 +#define CS42L43_BLOCK_EN4 0x0001010C +#define CS42L43_BLOCK_EN5 0x00010110 +#define CS42L43_BLOCK_EN6 0x00010114 +#define CS42L43_BLOCK_EN7 0x00010118 +#define CS42L43_BLOCK_EN8 0x0001011C +#define CS42L43_BLOCK_EN9 0x00010120 +#define CS42L43_BLOCK_EN10 0x00010124 +#define CS42L43_BLOCK_EN11 0x00010128 +#define CS42L43_TONE_CH1_CTRL 0x00010134 +#define CS42L43_TONE_CH2_CTRL 0x00010138 +#define CS42L43_MIC_DETECT_CONTROL_1 0x00011074 +#define CS42L43_DETECT_STATUS_1 0x0001107C +#define CS42L43_HS_BIAS_SENSE_AND_CLAMP_AUTOCONTROL 0x00011090 +#define CS42L43_MIC_DETECT_CONTROL_ANDROID 0x000110B0 +#define CS42L43_ISRC1_CTRL 0x00012004 +#define CS42L43_ISRC2_CTRL 0x00013004 +#define CS42L43_CTRL_REG 0x00014000 +#define CS42L43_FDIV_FRAC 0x00014004 +#define CS42L43_CAL_RATIO 0x00014008 +#define CS42L43_SPI_CLK_CONFIG1 0x00016004 +#define CS42L43_SPI_CONFIG1 0x00016010 +#define CS42L43_SPI_CONFIG2 0x00016014 +#define CS42L43_SPI_CONFIG3 0x00016018 +#define CS42L43_SPI_CONFIG4 0x00016024 +#define CS42L43_SPI_STATUS1 0x00016100 +#define CS42L43_SPI_STATUS2 0x00016104 +#define CS42L43_TRAN_CONFIG1 0x00016200 +#define CS42L43_TRAN_CONFIG2 0x00016204 +#define CS42L43_TRAN_CONFIG3 0x00016208 +#define CS42L43_TRAN_CONFIG4 0x0001620C +#define CS42L43_TRAN_CONFIG5 0x00016220 +#define CS42L43_TRAN_CONFIG6 0x00016224 +#define CS42L43_TRAN_CONFIG7 0x00016228 +#define CS42L43_TRAN_CONFIG8 0x0001622C +#define CS42L43_TRAN_STATUS1 0x00016300 +#define CS42L43_TRAN_STATUS2 0x00016304 +#define CS42L43_TRAN_STATUS3 0x00016308 +#define CS42L43_TX_DATA 0x00016400 +#define CS42L43_RX_DATA 0x00016600 +#define CS42L43_DACCNFG1 0x00017000 +#define CS42L43_DACCNFG2 0x00017004 +#define CS42L43_HPPATHVOL 0x0001700C +#define CS42L43_PGAVOL 0x00017014 +#define CS42L43_LOADDETRESULTS 0x00017018 +#define CS42L43_LOADDETENA 0x00017024 +#define CS42L43_CTRL 0x00017028 +#define CS42L43_COEFF_DATA_IN0 0x00018000 +#define CS42L43_COEFF_RD_WR0 0x00018008 +#define CS42L43_INIT_DONE0 0x00018010 +#define CS42L43_START_EQZ0 0x00018014 +#define CS42L43_MUTE_EQ_IN0 0x0001801C +#define CS42L43_DECIM_INT 0x0001B000 +#define CS42L43_EQ_INT 0x0001B004 +#define CS42L43_ASP_INT 0x0001B008 +#define CS42L43_PLL_INT 0x0001B00C +#define CS42L43_SOFT_INT 0x0001B010 +#define CS42L43_SWIRE_INT 0x0001B014 +#define CS42L43_MSM_INT 0x0001B018 +#define CS42L43_ACC_DET_INT 0x0001B01C +#define CS42L43_I2C_TGT_INT 0x0001B020 +#define CS42L43_SPI_MSTR_INT 0x0001B024 +#define CS42L43_SW_TO_SPI_BRIDGE_INT 0x0001B028 +#define CS42L43_OTP_INT 0x0001B02C +#define CS42L43_CLASS_D_AMP_INT 0x0001B030 +#define CS42L43_GPIO_INT 0x0001B034 +#define CS42L43_ASRC_INT 0x0001B038 +#define CS42L43_HPOUT_INT 0x0001B03C +#define CS42L43_DECIM_MASK 0x0001B0A0 +#define CS42L43_EQ_MIX_MASK 0x0001B0A4 +#define CS42L43_ASP_MASK 0x0001B0A8 +#define CS42L43_PLL_MASK 0x0001B0AC +#define CS42L43_SOFT_MASK 0x0001B0B0 +#define CS42L43_SWIRE_MASK 0x0001B0B4 +#define CS42L43_MSM_MASK 0x0001B0B8 +#define CS42L43_ACC_DET_MASK 0x0001B0BC +#define CS42L43_I2C_TGT_MASK 0x0001B0C0 +#define CS42L43_SPI_MSTR_MASK 0x0001B0C4 +#define CS42L43_SW_TO_SPI_BRIDGE_MASK 0x0001B0C8 +#define CS42L43_OTP_MASK 0x0001B0CC +#define CS42L43_CLASS_D_AMP_MASK 0x0001B0D0 +#define CS42L43_GPIO_INT_MASK 0x0001B0D4 +#define CS42L43_ASRC_MASK 0x0001B0D8 +#define CS42L43_HPOUT_MASK 0x0001B0DC +#define CS42L43_DECIM_INT_SHADOW 0x0001B300 +#define CS42L43_EQ_MIX_INT_SHADOW 0x0001B304 +#define CS42L43_ASP_INT_SHADOW 0x0001B308 +#define CS42L43_PLL_INT_SHADOW 0x0001B30C +#define CS42L43_SOFT_INT_SHADOW 0x0001B310 +#define CS42L43_SWIRE_INT_SHADOW 0x0001B314 +#define CS42L43_MSM_INT_SHADOW 0x0001B318 +#define CS42L43_ACC_DET_INT_SHADOW 0x0001B31C +#define CS42L43_I2C_TGT_INT_SHADOW 0x0001B320 +#define CS42L43_SPI_MSTR_INT_SHADOW 0x0001B324 +#define CS42L43_SW_TO_SPI_BRIDGE_SHADOW 0x0001B328 +#define CS42L43_OTP_INT_SHADOW 0x0001B32C +#define CS42L43_CLASS_D_AMP_INT_SHADOW 0x0001B330 +#define CS42L43_GPIO_SHADOW 0x0001B334 +#define CS42L43_ASRC_SHADOW 0x0001B338 +#define CS42L43_HP_OUT_SHADOW 0x0001B33C +#define CS42L43_BOOT_CONTROL 0x00101000 +#define CS42L43_BLOCK_EN 0x00101008 +#define CS42L43_SHUTTER_CONTROL 0x0010100C +#define CS42L43_MCU_SW_REV 0x00114000 +#define CS42L43_PATCH_START_ADDR 0x00114004 +#define CS42L43_NEED_CONFIGS 0x0011400C +#define CS42L43_BOOT_STATUS 0x0011401C +#define CS42L43_FW_SH_BOOT_CFG_NEED_CONFIGS 0x0011F8F8 +#define CS42L43_FW_MISSION_CTRL_NEED_CONFIGS 0x0011FE00 +#define CS42L43_FW_MISSION_CTRL_HAVE_CONFIGS 0x0011FE04 +#define CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION 0x0011FE0C +#define CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG 0x0011FE10 +#define CS42L43_MCU_RAM_MAX 0x0011FFFF + +/* CS42L43_DEVID */ +#define CS42L43_DEVID_VAL 0x00042A43 + +/* CS42L43_GEN_INT_STAT_1 */ +#define CS42L43_INT_STAT_GEN1_MASK 0x00000001 +#define CS42L43_INT_STAT_GEN1_SHIFT 0 + +/* CS42L43_SFT_RESET */ +#define CS42L43_SFT_RESET_MASK 0xFF000000 +#define CS42L43_SFT_RESET_SHIFT 24 + +#define CS42L43_SFT_RESET_VAL 0x5A000000 + +/* CS42L43_DRV_CTRL1 */ +#define CS42L43_ASP_DOUT_DRV_MASK 0x00038000 +#define CS42L43_ASP_DOUT_DRV_SHIFT 15 +#define CS42L43_ASP_FSYNC_DRV_MASK 0x00000E00 +#define CS42L43_ASP_FSYNC_DRV_SHIFT 9 +#define CS42L43_ASP_BCLK_DRV_MASK 0x000001C0 +#define CS42L43_ASP_BCLK_DRV_SHIFT 6 + +/* CS42L43_DRV_CTRL3 */ +#define CS42L43_I2C_ADDR_DRV_MASK 0x30000000 +#define CS42L43_I2C_ADDR_DRV_SHIFT 28 +#define CS42L43_I2C_SDA_DRV_MASK 0x0C000000 +#define CS42L43_I2C_SDA_DRV_SHIFT 26 +#define CS42L43_PDMOUT2_CLK_DRV_MASK 0x00E00000 +#define CS42L43_PDMOUT2_CLK_DRV_SHIFT 21 +#define CS42L43_PDMOUT2_DATA_DRV_MASK 0x001C0000 +#define CS42L43_PDMOUT2_DATA_DRV_SHIFT 18 +#define CS42L43_PDMOUT1_CLK_DRV_MASK 0x00038000 +#define CS42L43_PDMOUT1_CLK_DRV_SHIFT 15 +#define CS42L43_PDMOUT1_DATA_DRV_MASK 0x00007000 +#define CS42L43_PDMOUT1_DATA_DRV_SHIFT 12 +#define CS42L43_SPI_MISO_DRV_MASK 0x00000038 +#define CS42L43_SPI_MISO_DRV_SHIFT 3 + +/* CS42L43_DRV_CTRL4 */ +#define CS42L43_GPIO3_DRV_MASK 0x00000E00 +#define CS42L43_GPIO3_DRV_SHIFT 9 +#define CS42L43_GPIO2_DRV_MASK 0x000001C0 +#define CS42L43_GPIO2_DRV_SHIFT 6 +#define CS42L43_GPIO1_DRV_MASK 0x00000038 +#define CS42L43_GPIO1_DRV_SHIFT 3 + +/* CS42L43_DRV_CTRL_5 */ +#define CS42L43_I2C_SCL_DRV_MASK 0x18000000 +#define CS42L43_I2C_SCL_DRV_SHIFT 27 +#define CS42L43_SPI_SCK_DRV_MASK 0x07000000 +#define CS42L43_SPI_SCK_DRV_SHIFT 24 +#define CS42L43_SPI_MOSI_DRV_MASK 0x00E00000 +#define CS42L43_SPI_MOSI_DRV_SHIFT 21 +#define CS42L43_SPI_SSB_DRV_MASK 0x001C0000 +#define CS42L43_SPI_SSB_DRV_SHIFT 18 +#define CS42L43_ASP_DIN_DRV_MASK 0x000001C0 +#define CS42L43_ASP_DIN_DRV_SHIFT 6 + +/* CS42L43_GPIO_CTRL1 */ +#define CS42L43_GPIO3_POL_MASK 0x00040000 +#define CS42L43_GPIO3_POL_SHIFT 18 +#define CS42L43_GPIO2_POL_MASK 0x00020000 +#define CS42L43_GPIO2_POL_SHIFT 17 +#define CS42L43_GPIO1_POL_MASK 0x00010000 +#define CS42L43_GPIO1_POL_SHIFT 16 +#define CS42L43_GPIO3_LVL_MASK 0x00000400 +#define CS42L43_GPIO3_LVL_SHIFT 10 +#define CS42L43_GPIO2_LVL_MASK 0x00000200 +#define CS42L43_GPIO2_LVL_SHIFT 9 +#define CS42L43_GPIO1_LVL_MASK 0x00000100 +#define CS42L43_GPIO1_LVL_SHIFT 8 +#define CS42L43_GPIO3_DIR_MASK 0x00000004 +#define CS42L43_GPIO3_DIR_SHIFT 2 +#define CS42L43_GPIO2_DIR_MASK 0x00000002 +#define CS42L43_GPIO2_DIR_SHIFT 1 +#define CS42L43_GPIO1_DIR_MASK 0x00000001 +#define CS42L43_GPIO1_DIR_SHIFT 0 + +/* CS42L43_GPIO_CTRL2 */ +#define CS42L43_GPIO3_DEGLITCH_BYP_MASK 0x00000004 +#define CS42L43_GPIO3_DEGLITCH_BYP_SHIFT 2 +#define CS42L43_GPIO2_DEGLITCH_BYP_MASK 0x00000002 +#define CS42L43_GPIO2_DEGLITCH_BYP_SHIFT 1 +#define CS42L43_GPIO1_DEGLITCH_BYP_MASK 0x00000001 +#define CS42L43_GPIO1_DEGLITCH_BYP_SHIFT 0 + +/* CS42L43_GPIO_STS */ +#define CS42L43_GPIO3_STS_MASK 0x00000004 +#define CS42L43_GPIO3_STS_SHIFT 2 +#define CS42L43_GPIO2_STS_MASK 0x00000002 +#define CS42L43_GPIO2_STS_SHIFT 1 +#define CS42L43_GPIO1_STS_MASK 0x00000001 +#define CS42L43_GPIO1_STS_SHIFT 0 + +/* CS42L43_GPIO_FN_SEL */ +#define CS42L43_GPIO3_FN_SEL_MASK 0x00000004 +#define CS42L43_GPIO3_FN_SEL_SHIFT 2 +#define CS42L43_GPIO1_FN_SEL_MASK 0x00000001 +#define CS42L43_GPIO1_FN_SEL_SHIFT 0 + +/* CS42L43_MCLK_SRC_SEL */ +#define CS42L43_OSC_PLL_MCLK_SEL_MASK 0x00000001 +#define CS42L43_OSC_PLL_MCLK_SEL_SHIFT 0 + +/* CS42L43_SAMPLE_RATE1..CS42L43_SAMPLE_RATE4 */ +#define CS42L43_SAMPLE_RATE_MASK 0x0000001F +#define CS42L43_SAMPLE_RATE_SHIFT 0 + +/* CS42L43_PLL_CONTROL */ +#define CS42L43_PLL_REFCLK_EN_MASK 0x00000008 +#define CS42L43_PLL_REFCLK_EN_SHIFT 3 +#define CS42L43_PLL_REFCLK_DIV_MASK 0x00000006 +#define CS42L43_PLL_REFCLK_DIV_SHIFT 1 +#define CS42L43_PLL_REFCLK_SRC_MASK 0x00000001 +#define CS42L43_PLL_REFCLK_SRC_SHIFT 0 + +/* CS42L43_FS_SELECT1 */ +#define CS42L43_ASP_RATE_MASK 0x00000003 +#define CS42L43_ASP_RATE_SHIFT 0 + +/* CS42L43_FS_SELECT2 */ +#define CS42L43_ASRC_DEC_OUT_RATE_MASK 0x000000C0 +#define CS42L43_ASRC_DEC_OUT_RATE_SHIFT 6 +#define CS42L43_ASRC_INT_OUT_RATE_MASK 0x00000030 +#define CS42L43_ASRC_INT_OUT_RATE_SHIFT 4 +#define CS42L43_ASRC_DEC_IN_RATE_MASK 0x0000000C +#define CS42L43_ASRC_DEC_IN_RATE_SHIFT 2 +#define CS42L43_ASRC_INT_IN_RATE_MASK 0x00000003 +#define CS42L43_ASRC_INT_IN_RATE_SHIFT 0 + +/* CS42L43_FS_SELECT3 */ +#define CS42L43_HPOUT_RATE_MASK 0x0000C000 +#define CS42L43_HPOUT_RATE_SHIFT 14 +#define CS42L43_EQZ_RATE_MASK 0x00003000 +#define CS42L43_EQZ_RATE_SHIFT 12 +#define CS42L43_DIAGGEN_RATE_MASK 0x00000C00 +#define CS42L43_DIAGGEN_RATE_SHIFT 10 +#define CS42L43_DECIM_CH4_RATE_MASK 0x00000300 +#define CS42L43_DECIM_CH4_RATE_SHIFT 8 +#define CS42L43_DECIM_CH3_RATE_MASK 0x000000C0 +#define CS42L43_DECIM_CH3_RATE_SHIFT 6 +#define CS42L43_DECIM_CH2_RATE_MASK 0x00000030 +#define CS42L43_DECIM_CH2_RATE_SHIFT 4 +#define CS42L43_DECIM_CH1_RATE_MASK 0x0000000C +#define CS42L43_DECIM_CH1_RATE_SHIFT 2 +#define CS42L43_AMP1_2_RATE_MASK 0x00000003 +#define CS42L43_AMP1_2_RATE_SHIFT 0 + +/* CS42L43_FS_SELECT4 */ +#define CS42L43_SW_DP7_RATE_MASK 0x00C00000 +#define CS42L43_SW_DP7_RATE_SHIFT 22 +#define CS42L43_SW_DP6_RATE_MASK 0x00300000 +#define CS42L43_SW_DP6_RATE_SHIFT 20 +#define CS42L43_SPDIF_RATE_MASK 0x000C0000 +#define CS42L43_SPDIF_RATE_SHIFT 18 +#define CS42L43_SW_DP5_RATE_MASK 0x00030000 +#define CS42L43_SW_DP5_RATE_SHIFT 16 +#define CS42L43_SW_DP4_RATE_MASK 0x0000C000 +#define CS42L43_SW_DP4_RATE_SHIFT 14 +#define CS42L43_SW_DP3_RATE_MASK 0x00003000 +#define CS42L43_SW_DP3_RATE_SHIFT 12 +#define CS42L43_SW_DP2_RATE_MASK 0x00000C00 +#define CS42L43_SW_DP2_RATE_SHIFT 10 +#define CS42L43_SW_DP1_RATE_MASK 0x00000300 +#define CS42L43_SW_DP1_RATE_SHIFT 8 +#define CS42L43_ISRC2_LOW_RATE_MASK 0x000000C0 +#define CS42L43_ISRC2_LOW_RATE_SHIFT 6 +#define CS42L43_ISRC2_HIGH_RATE_MASK 0x00000030 +#define CS42L43_ISRC2_HIGH_RATE_SHIFT 4 +#define CS42L43_ISRC1_LOW_RATE_MASK 0x0000000C +#define CS42L43_ISRC1_LOW_RATE_SHIFT 2 +#define CS42L43_ISRC1_HIGH_RATE_MASK 0x00000003 +#define CS42L43_ISRC1_HIGH_RATE_SHIFT 0 + +/* CS42L43_PDM_CONTROL */ +#define CS42L43_PDM2_CLK_DIV_MASK 0x0000000C +#define CS42L43_PDM2_CLK_DIV_SHIFT 2 +#define CS42L43_PDM1_CLK_DIV_MASK 0x00000003 +#define CS42L43_PDM1_CLK_DIV_SHIFT 0 + +/* CS42L43_ASP_CLK_CONFIG1 */ +#define CS42L43_ASP_BCLK_N_MASK 0x03FF0000 +#define CS42L43_ASP_BCLK_N_SHIFT 16 +#define CS42L43_ASP_BCLK_M_MASK 0x000003FF +#define CS42L43_ASP_BCLK_M_SHIFT 0 + +/* CS42L43_ASP_CLK_CONFIG2 */ +#define CS42L43_ASP_MASTER_MODE_MASK 0x00000002 +#define CS42L43_ASP_MASTER_MODE_SHIFT 1 +#define CS42L43_ASP_BCLK_INV_MASK 0x00000001 +#define CS42L43_ASP_BCLK_INV_SHIFT 0 + +/* CS42L43_OSC_DIV_SEL */ +#define CS42L43_OSC_DIV2_EN_MASK 0x00000001 +#define CS42L43_OSC_DIV2_EN_SHIFT 0 + +/* CS42L43_ADC_B_CTRL1..CS42L43_ADC_B_CTRL1 */ +#define CS42L43_PGA_WIDESWING_MODE_EN_MASK 0x00000080 +#define CS42L43_PGA_WIDESWING_MODE_EN_SHIFT 7 +#define CS42L43_ADC_AIN_SEL_MASK 0x00000010 +#define CS42L43_ADC_AIN_SEL_SHIFT 4 +#define CS42L43_ADC_PGA_GAIN_MASK 0x0000000F +#define CS42L43_ADC_PGA_GAIN_SHIFT 0 + +/* CS42L43_DECIM_HPF_WNF_CTRL1..CS42L43_DECIM_HPF_WNF_CTRL4 */ +#define CS42L43_DECIM_WNF_CF_MASK 0x00000070 +#define CS42L43_DECIM_WNF_CF_SHIFT 4 +#define CS42L43_DECIM_WNF_EN_MASK 0x00000008 +#define CS42L43_DECIM_WNF_EN_SHIFT 3 +#define CS42L43_DECIM_HPF_CF_MASK 0x00000006 +#define CS42L43_DECIM_HPF_CF_SHIFT 1 +#define CS42L43_DECIM_HPF_EN_MASK 0x00000001 +#define CS42L43_DECIM_HPF_EN_SHIFT 0 + +/* CS42L43_DMIC_PDM_CTRL */ +#define CS42L43_PDM2R_INV_MASK 0x00000020 +#define CS42L43_PDM2R_INV_SHIFT 5 +#define CS42L43_PDM2L_INV_MASK 0x00000010 +#define CS42L43_PDM2L_INV_SHIFT 4 +#define CS42L43_PDM1R_INV_MASK 0x00000008 +#define CS42L43_PDM1R_INV_SHIFT 3 +#define CS42L43_PDM1L_INV_MASK 0x00000004 +#define CS42L43_PDM1L_INV_SHIFT 2 + +/* CS42L43_DECIM_VOL_CTRL_CH1_CH2 */ +#define CS42L43_DECIM2_MUTE_MASK 0x80000000 +#define CS42L43_DECIM2_MUTE_SHIFT 31 +#define CS42L43_DECIM2_VOL_MASK 0x3FC00000 +#define CS42L43_DECIM2_VOL_SHIFT 22 +#define CS42L43_DECIM2_VD_RAMP_MASK 0x00380000 +#define CS42L43_DECIM2_VD_RAMP_SHIFT 19 +#define CS42L43_DECIM2_VI_RAMP_MASK 0x00070000 +#define CS42L43_DECIM2_VI_RAMP_SHIFT 16 +#define CS42L43_DECIM1_MUTE_MASK 0x00008000 +#define CS42L43_DECIM1_MUTE_SHIFT 15 +#define CS42L43_DECIM1_VOL_MASK 0x00003FC0 +#define CS42L43_DECIM1_VOL_SHIFT 6 +#define CS42L43_DECIM1_VD_RAMP_MASK 0x00000038 +#define CS42L43_DECIM1_VD_RAMP_SHIFT 3 +#define CS42L43_DECIM1_VI_RAMP_MASK 0x00000007 +#define CS42L43_DECIM1_VI_RAMP_SHIFT 0 + +/* CS42L43_DECIM_VOL_CTRL_CH3_CH4 */ +#define CS42L43_DECIM4_MUTE_MASK 0x80000000 +#define CS42L43_DECIM4_MUTE_SHIFT 31 +#define CS42L43_DECIM4_VOL_MASK 0x3FC00000 +#define CS42L43_DECIM4_VOL_SHIFT 22 +#define CS42L43_DECIM4_VD_RAMP_MASK 0x00380000 +#define CS42L43_DECIM4_VD_RAMP_SHIFT 19 +#define CS42L43_DECIM4_VI_RAMP_MASK 0x00070000 +#define CS42L43_DECIM4_VI_RAMP_SHIFT 16 +#define CS42L43_DECIM3_MUTE_MASK 0x00008000 +#define CS42L43_DECIM3_MUTE_SHIFT 15 +#define CS42L43_DECIM3_VOL_MASK 0x00003FC0 +#define CS42L43_DECIM3_VOL_SHIFT 6 +#define CS42L43_DECIM3_VD_RAMP_MASK 0x00000038 +#define CS42L43_DECIM3_VD_RAMP_SHIFT 3 +#define CS42L43_DECIM3_VI_RAMP_MASK 0x00000007 +#define CS42L43_DECIM3_VI_RAMP_SHIFT 0 + +/* CS42L43_DECIM_VOL_CTRL_UPDATE */ +#define CS42L43_DECIM4_VOL_UPDATE_MASK 0x00000008 +#define CS42L43_DECIM4_VOL_UPDATE_SHIFT 3 +#define CS42L43_DECIM3_VOL_UPDATE_MASK 0x00000004 +#define CS42L43_DECIM3_VOL_UPDATE_SHIFT 2 +#define CS42L43_DECIM2_VOL_UPDATE_MASK 0x00000002 +#define CS42L43_DECIM2_VOL_UPDATE_SHIFT 1 +#define CS42L43_DECIM1_VOL_UPDATE_MASK 0x00000001 +#define CS42L43_DECIM1_VOL_UPDATE_SHIFT 0 + +/* CS42L43_INTP_VOLUME_CTRL1..CS42L43_INTP_VOLUME_CTRL2 */ +#define CS42L43_AMP1_2_VU_MASK 0x00000200 +#define CS42L43_AMP1_2_VU_SHIFT 9 +#define CS42L43_AMP_MUTE_MASK 0x00000100 +#define CS42L43_AMP_MUTE_SHIFT 8 +#define CS42L43_AMP_VOL_MASK 0x000000FF +#define CS42L43_AMP_VOL_SHIFT 0 + +/* CS42L43_AMP1_2_VOL_RAMP */ +#define CS42L43_AMP1_2_VD_RAMP_MASK 0x00000070 +#define CS42L43_AMP1_2_VD_RAMP_SHIFT 4 +#define CS42L43_AMP1_2_VI_RAMP_MASK 0x00000007 +#define CS42L43_AMP1_2_VI_RAMP_SHIFT 0 + +/* CS42L43_ASP_CTRL */ +#define CS42L43_ASP_FSYNC_MODE_MASK 0x00000004 +#define CS42L43_ASP_FSYNC_MODE_SHIFT 2 +#define CS42L43_ASP_BCLK_EN_MASK 0x00000002 +#define CS42L43_ASP_BCLK_EN_SHIFT 1 +#define CS42L43_ASP_FSYNC_EN_MASK 0x00000001 +#define CS42L43_ASP_FSYNC_EN_SHIFT 0 + +/* CS42L43_ASP_FSYNC_CTRL1 */ +#define CS42L43_ASP_FSYNC_M_MASK 0x0007FFFF +#define CS42L43_ASP_FSYNC_M_SHIFT 0 + +/* CS42L43_ASP_FSYNC_CTRL3 */ +#define CS42L43_ASP_FSYNC_IN_INV_MASK 0x00000002 +#define CS42L43_ASP_FSYNC_IN_INV_SHIFT 1 +#define CS42L43_ASP_FSYNC_OUT_INV_MASK 0x00000001 +#define CS42L43_ASP_FSYNC_OUT_INV_SHIFT 0 + +/* CS42L43_ASP_FSYNC_CTRL4 */ +#define CS42L43_ASP_NUM_BCLKS_PER_FSYNC_MASK 0x00001FFE +#define CS42L43_ASP_NUM_BCLKS_PER_FSYNC_SHIFT 1 + +/* CS42L43_ASP_DATA_CTRL */ +#define CS42L43_ASP_FSYNC_FRAME_START_PHASE_MASK 0x00000008 +#define CS42L43_ASP_FSYNC_FRAME_START_PHASE_SHIFT 3 +#define CS42L43_ASP_FSYNC_FRAME_START_DLY_MASK 0x00000007 +#define CS42L43_ASP_FSYNC_FRAME_START_DLY_SHIFT 0 + +/* CS42L43_ASP_RX_EN */ +#define CS42L43_ASP_RX_CH6_EN_MASK 0x00000020 +#define CS42L43_ASP_RX_CH6_EN_SHIFT 5 +#define CS42L43_ASP_RX_CH5_EN_MASK 0x00000010 +#define CS42L43_ASP_RX_CH5_EN_SHIFT 4 +#define CS42L43_ASP_RX_CH4_EN_MASK 0x00000008 +#define CS42L43_ASP_RX_CH4_EN_SHIFT 3 +#define CS42L43_ASP_RX_CH3_EN_MASK 0x00000004 +#define CS42L43_ASP_RX_CH3_EN_SHIFT 2 +#define CS42L43_ASP_RX_CH2_EN_MASK 0x00000002 +#define CS42L43_ASP_RX_CH2_EN_SHIFT 1 +#define CS42L43_ASP_RX_CH1_EN_MASK 0x00000001 +#define CS42L43_ASP_RX_CH1_EN_SHIFT 0 + +/* CS42L43_ASP_TX_EN */ +#define CS42L43_ASP_TX_CH6_EN_MASK 0x00000020 +#define CS42L43_ASP_TX_CH6_EN_SHIFT 5 +#define CS42L43_ASP_TX_CH5_EN_MASK 0x00000010 +#define CS42L43_ASP_TX_CH5_EN_SHIFT 4 +#define CS42L43_ASP_TX_CH4_EN_MASK 0x00000008 +#define CS42L43_ASP_TX_CH4_EN_SHIFT 3 +#define CS42L43_ASP_TX_CH3_EN_MASK 0x00000004 +#define CS42L43_ASP_TX_CH3_EN_SHIFT 2 +#define CS42L43_ASP_TX_CH2_EN_MASK 0x00000002 +#define CS42L43_ASP_TX_CH2_EN_SHIFT 1 +#define CS42L43_ASP_TX_CH1_EN_MASK 0x00000001 +#define CS42L43_ASP_TX_CH1_EN_SHIFT 0 + +/* CS42L43_ASP_RX_CH1_CTRL..CS42L43_ASP_TX_CH6_CTRL */ +#define CS42L43_ASP_CH_WIDTH_MASK 0x001F0000 +#define CS42L43_ASP_CH_WIDTH_SHIFT 16 +#define CS42L43_ASP_CH_SLOT_MASK 0x00001FFE +#define CS42L43_ASP_CH_SLOT_SHIFT 1 +#define CS42L43_ASP_CH_SLOT_PHASE_MASK 0x00000001 +#define CS42L43_ASP_CH_SLOT_PHASE_SHIFT 0 + +/* CS42L43_ASPTX1_INPUT..CS42L43_AMP4MIX_INPUT4 */ +#define CS42L43_MIXER_VOL_MASK 0x00FE0000 +#define CS42L43_MIXER_VOL_SHIFT 17 +#define CS42L43_MIXER_SRC_MASK 0x000001FF +#define CS42L43_MIXER_SRC_SHIFT 0 + +/* CS42L43_ASRC_INT_ENABLES */ +#define CS42L43_ASRC_INT4_EN_MASK 0x00000008 +#define CS42L43_ASRC_INT4_EN_SHIFT 3 +#define CS42L43_ASRC_INT3_EN_MASK 0x00000004 +#define CS42L43_ASRC_INT3_EN_SHIFT 2 +#define CS42L43_ASRC_INT2_EN_MASK 0x00000002 +#define CS42L43_ASRC_INT2_EN_SHIFT 1 +#define CS42L43_ASRC_INT1_EN_MASK 0x00000001 +#define CS42L43_ASRC_INT1_EN_SHIFT 0 + +/* CS42L43_ASRC_DEC_ENABLES */ +#define CS42L43_ASRC_DEC4_EN_MASK 0x00000008 +#define CS42L43_ASRC_DEC4_EN_SHIFT 3 +#define CS42L43_ASRC_DEC3_EN_MASK 0x00000004 +#define CS42L43_ASRC_DEC3_EN_SHIFT 2 +#define CS42L43_ASRC_DEC2_EN_MASK 0x00000002 +#define CS42L43_ASRC_DEC2_EN_SHIFT 1 +#define CS42L43_ASRC_DEC1_EN_MASK 0x00000001 +#define CS42L43_ASRC_DEC1_EN_SHIFT 0 + +/* CS42L43_PDNCNTL */ +#define CS42L43_RING_SENSE_EN_MASK 0x00000002 +#define CS42L43_RING_SENSE_EN_SHIFT 1 + +/* CS42L43_RINGSENSE_DEB_CTRL */ +#define CS42L43_RINGSENSE_INV_MASK 0x00000080 +#define CS42L43_RINGSENSE_INV_SHIFT 7 +#define CS42L43_RINGSENSE_PULLUP_PDNB_MASK 0x00000040 +#define CS42L43_RINGSENSE_PULLUP_PDNB_SHIFT 6 +#define CS42L43_RINGSENSE_FALLING_DB_TIME_MASK 0x00000038 +#define CS42L43_RINGSENSE_FALLING_DB_TIME_SHIFT 3 +#define CS42L43_RINGSENSE_RISING_DB_TIME_MASK 0x00000007 +#define CS42L43_RINGSENSE_RISING_DB_TIME_SHIFT 0 + +/* CS42L43_TIPSENSE_DEB_CTRL */ +#define CS42L43_TIPSENSE_INV_MASK 0x00000080 +#define CS42L43_TIPSENSE_INV_SHIFT 7 +#define CS42L43_TIPSENSE_FALLING_DB_TIME_MASK 0x00000038 +#define CS42L43_TIPSENSE_FALLING_DB_TIME_SHIFT 3 +#define CS42L43_TIPSENSE_RISING_DB_TIME_MASK 0x00000007 +#define CS42L43_TIPSENSE_RISING_DB_TIME_SHIFT 0 + +/* CS42L43_TIP_RING_SENSE_INTERRUPT_STATUS */ +#define CS42L43_TIPSENSE_UNPLUG_DB_STS_MASK 0x00000008 +#define CS42L43_TIPSENSE_UNPLUG_DB_STS_SHIFT 3 +#define CS42L43_TIPSENSE_PLUG_DB_STS_MASK 0x00000004 +#define CS42L43_TIPSENSE_PLUG_DB_STS_SHIFT 2 +#define CS42L43_RINGSENSE_UNPLUG_DB_STS_MASK 0x00000002 +#define CS42L43_RINGSENSE_UNPLUG_DB_STS_SHIFT 1 +#define CS42L43_RINGSENSE_PLUG_DB_STS_MASK 0x00000001 +#define CS42L43_RINGSENSE_PLUG_DB_STS_SHIFT 0 + +/* CS42L43_HS2 */ +#define CS42L43_HS_CLAMP_DISABLE_MASK 0x10000000 +#define CS42L43_HS_CLAMP_DISABLE_SHIFT 28 +#define CS42L43_HSBIAS_RAMP_MASK 0x0C000000 +#define CS42L43_HSBIAS_RAMP_SHIFT 26 +#define CS42L43_HSDET_MODE_MASK 0x00018000 +#define CS42L43_HSDET_MODE_SHIFT 15 +#define CS42L43_HSDET_MANUAL_MODE_MASK 0x00006000 +#define CS42L43_HSDET_MANUAL_MODE_SHIFT 13 +#define CS42L43_AUTO_HSDET_TIME_MASK 0x00000700 +#define CS42L43_AUTO_HSDET_TIME_SHIFT 8 +#define CS42L43_AMP3_4_GNDREF_HS3_SEL_MASK 0x00000080 +#define CS42L43_AMP3_4_GNDREF_HS3_SEL_SHIFT 7 +#define CS42L43_AMP3_4_GNDREF_HS4_SEL_MASK 0x00000040 +#define CS42L43_AMP3_4_GNDREF_HS4_SEL_SHIFT 6 +#define CS42L43_HSBIAS_GNDREF_HS3_SEL_MASK 0x00000020 +#define CS42L43_HSBIAS_GNDREF_HS3_SEL_SHIFT 5 +#define CS42L43_HSBIAS_GNDREF_HS4_SEL_MASK 0x00000010 +#define CS42L43_HSBIAS_GNDREF_HS4_SEL_SHIFT 4 +#define CS42L43_HSBIAS_OUT_HS3_SEL_MASK 0x00000008 +#define CS42L43_HSBIAS_OUT_HS3_SEL_SHIFT 3 +#define CS42L43_HSBIAS_OUT_HS4_SEL_MASK 0x00000004 +#define CS42L43_HSBIAS_OUT_HS4_SEL_SHIFT 2 +#define CS42L43_HSGND_HS3_SEL_MASK 0x00000002 +#define CS42L43_HSGND_HS3_SEL_SHIFT 1 +#define CS42L43_HSGND_HS4_SEL_MASK 0x00000001 +#define CS42L43_HSGND_HS4_SEL_SHIFT 0 + +/* CS42L43_HS_STAT */ +#define CS42L43_HSDET_TYPE_STS_MASK 0x00000007 +#define CS42L43_HSDET_TYPE_STS_SHIFT 0 + +/* CS42L43_MCU_SW_INTERRUPT */ +#define CS42L43_CONTROL_IND_MASK 0x00000004 +#define CS42L43_CONTROL_IND_SHIFT 2 +#define CS42L43_CONFIGS_IND_MASK 0x00000002 +#define CS42L43_CONFIGS_IND_SHIFT 1 +#define CS42L43_PATCH_IND_MASK 0x00000001 +#define CS42L43_PATCH_IND_SHIFT 0 + +/* CS42L43_STEREO_MIC_CTRL */ +#define CS42L43_HS2_BIAS_SENSE_EN_MASK 0x00000020 +#define CS42L43_HS2_BIAS_SENSE_EN_SHIFT 5 +#define CS42L43_HS1_BIAS_SENSE_EN_MASK 0x00000010 +#define CS42L43_HS1_BIAS_SENSE_EN_SHIFT 4 +#define CS42L43_HS2_BIAS_EN_MASK 0x00000008 +#define CS42L43_HS2_BIAS_EN_SHIFT 3 +#define CS42L43_HS1_BIAS_EN_MASK 0x00000004 +#define CS42L43_HS1_BIAS_EN_SHIFT 2 +#define CS42L43_JACK_STEREO_CONFIG_MASK 0x00000003 +#define CS42L43_JACK_STEREO_CONFIG_SHIFT 0 + +/* CS42L43_STEREO_MIC_CLAMP_CTRL */ +#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK 0x00000002 +#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_SHIFT 1 +#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_MASK 0x00000001 +#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_SHIFT 0 + +/* CS42L43_BLOCK_EN2 */ +#define CS42L43_SPI_MSTR_EN_MASK 0x00000001 +#define CS42L43_SPI_MSTR_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN3 */ +#define CS42L43_PDM2_DIN_R_EN_MASK 0x00000020 +#define CS42L43_PDM2_DIN_R_EN_SHIFT 5 +#define CS42L43_PDM2_DIN_L_EN_MASK 0x00000010 +#define CS42L43_PDM2_DIN_L_EN_SHIFT 4 +#define CS42L43_PDM1_DIN_R_EN_MASK 0x00000008 +#define CS42L43_PDM1_DIN_R_EN_SHIFT 3 +#define CS42L43_PDM1_DIN_L_EN_MASK 0x00000004 +#define CS42L43_PDM1_DIN_L_EN_SHIFT 2 +#define CS42L43_ADC2_EN_MASK 0x00000002 +#define CS42L43_ADC2_EN_SHIFT 1 +#define CS42L43_ADC1_EN_MASK 0x00000001 +#define CS42L43_ADC1_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN4 */ +#define CS42L43_ASRC_DEC_BANK_EN_MASK 0x00000002 +#define CS42L43_ASRC_DEC_BANK_EN_SHIFT 1 +#define CS42L43_ASRC_INT_BANK_EN_MASK 0x00000001 +#define CS42L43_ASRC_INT_BANK_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN5 */ +#define CS42L43_ISRC2_BANK_EN_MASK 0x00000002 +#define CS42L43_ISRC2_BANK_EN_SHIFT 1 +#define CS42L43_ISRC1_BANK_EN_MASK 0x00000001 +#define CS42L43_ISRC1_BANK_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN6 */ +#define CS42L43_MIXER_EN_MASK 0x00000001 +#define CS42L43_MIXER_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN7 */ +#define CS42L43_EQ_EN_MASK 0x00000001 +#define CS42L43_EQ_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN8 */ +#define CS42L43_HP_EN_MASK 0x00000001 +#define CS42L43_HP_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN9 */ +#define CS42L43_TONE_EN_MASK 0x00000001 +#define CS42L43_TONE_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN10 */ +#define CS42L43_AMP2_EN_MASK 0x00000002 +#define CS42L43_AMP2_EN_SHIFT 1 +#define CS42L43_AMP1_EN_MASK 0x00000001 +#define CS42L43_AMP1_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN11 */ +#define CS42L43_SPDIF_EN_MASK 0x00000001 +#define CS42L43_SPDIF_EN_SHIFT 0 + +/* CS42L43_TONE_CH1_CTRL..CS42L43_TONE_CH2_CTRL */ +#define CS42L43_TONE_FREQ_MASK 0x00000070 +#define CS42L43_TONE_FREQ_SHIFT 4 +#define CS42L43_TONE_SEL_MASK 0x0000000F +#define CS42L43_TONE_SEL_SHIFT 0 + +/* CS42L43_MIC_DETECT_CONTROL_1 */ +#define CS42L43_BUTTON_DETECT_MODE_MASK 0x00000018 +#define CS42L43_BUTTON_DETECT_MODE_SHIFT 3 +#define CS42L43_HSBIAS_MODE_MASK 0x00000006 +#define CS42L43_HSBIAS_MODE_SHIFT 1 +#define CS42L43_MIC_LVL_DET_DISABLE_MASK 0x00000001 +#define CS42L43_MIC_LVL_DET_DISABLE_SHIFT 0 + +/* CS42L43_DETECT_STATUS_1 */ +#define CS42L43_HSDET_DC_STS_MASK 0x01FF0000 +#define CS42L43_HSDET_DC_STS_SHIFT 16 +#define CS42L43_JACKDET_STS_MASK 0x00000080 +#define CS42L43_JACKDET_STS_SHIFT 7 +#define CS42L43_HSBIAS_CLAMP_STS_MASK 0x00000040 +#define CS42L43_HSBIAS_CLAMP_STS_SHIFT 6 + +/* CS42L43_HS_BIAS_SENSE_AND_CLAMP_AUTOCONTROL */ +#define CS42L43_JACKDET_MODE_MASK 0xC0000000 +#define CS42L43_JACKDET_MODE_SHIFT 30 +#define CS42L43_JACKDET_INV_MASK 0x20000000 +#define CS42L43_JACKDET_INV_SHIFT 29 +#define CS42L43_JACKDET_DB_TIME_MASK 0x03000000 +#define CS42L43_JACKDET_DB_TIME_SHIFT 24 +#define CS42L43_S0_AUTO_ADCMUTE_DISABLE_MASK 0x00800000 +#define CS42L43_S0_AUTO_ADCMUTE_DISABLE_SHIFT 23 +#define CS42L43_HSBIAS_SENSE_EN_MASK 0x00000080 +#define CS42L43_HSBIAS_SENSE_EN_SHIFT 7 +#define CS42L43_AUTO_HSBIAS_CLAMP_EN_MASK 0x00000040 +#define CS42L43_AUTO_HSBIAS_CLAMP_EN_SHIFT 6 +#define CS42L43_JACKDET_SENSE_EN_MASK 0x00000020 +#define CS42L43_JACKDET_SENSE_EN_SHIFT 5 +#define CS42L43_HSBIAS_SENSE_TRIP_MASK 0x00000007 +#define CS42L43_HSBIAS_SENSE_TRIP_SHIFT 0 + +/* CS42L43_MIC_DETECT_CONTROL_ANDROID */ +#define CS42L43_HSDET_LVL_COMBWIDTH_MASK 0xC0000000 +#define CS42L43_HSDET_LVL_COMBWIDTH_SHIFT 30 +#define CS42L43_HSDET_LVL2_THRESH_MASK 0x01FF0000 +#define CS42L43_HSDET_LVL2_THRESH_SHIFT 16 +#define CS42L43_HSDET_LVL1_THRESH_MASK 0x000001FF +#define CS42L43_HSDET_LVL1_THRESH_SHIFT 0 + +/* CS42L43_ISRC1_CTRL..CS42L43_ISRC2_CTRL */ +#define CS42L43_ISRC_INT2_EN_MASK 0x00000200 +#define CS42L43_ISRC_INT2_EN_SHIFT 9 +#define CS42L43_ISRC_INT1_EN_MASK 0x00000100 +#define CS42L43_ISRC_INT1_EN_SHIFT 8 +#define CS42L43_ISRC_DEC2_EN_MASK 0x00000002 +#define CS42L43_ISRC_DEC2_EN_SHIFT 1 +#define CS42L43_ISRC_DEC1_EN_MASK 0x00000001 +#define CS42L43_ISRC_DEC1_EN_SHIFT 0 + +/* CS42L43_CTRL_REG */ +#define CS42L43_PLL_MODE_BYPASS_500_MASK 0x00000004 +#define CS42L43_PLL_MODE_BYPASS_500_SHIFT 2 +#define CS42L43_PLL_MODE_BYPASS_1029_MASK 0x00000002 +#define CS42L43_PLL_MODE_BYPASS_1029_SHIFT 1 +#define CS42L43_PLL_EN_MASK 0x00000001 +#define CS42L43_PLL_EN_SHIFT 0 + +/* CS42L43_FDIV_FRAC */ +#define CS42L43_PLL_DIV_INT_MASK 0xFF000000 +#define CS42L43_PLL_DIV_INT_SHIFT 24 +#define CS42L43_PLL_DIV_FRAC_BYTE2_MASK 0x00FF0000 +#define CS42L43_PLL_DIV_FRAC_BYTE2_SHIFT 16 +#define CS42L43_PLL_DIV_FRAC_BYTE1_MASK 0x0000FF00 +#define CS42L43_PLL_DIV_FRAC_BYTE1_SHIFT 8 +#define CS42L43_PLL_DIV_FRAC_BYTE0_MASK 0x000000FF +#define CS42L43_PLL_DIV_FRAC_BYTE0_SHIFT 0 + +/* CS42L43_CAL_RATIO */ +#define CS42L43_PLL_CAL_RATIO_MASK 0x000000FF +#define CS42L43_PLL_CAL_RATIO_SHIFT 0 + +/* CS42L43_SPI_CLK_CONFIG1 */ +#define CS42L43_SCLK_DIV_MASK 0x0000000F +#define CS42L43_SCLK_DIV_SHIFT 0 + +/* CS42L43_SPI_CONFIG1 */ +#define CS42L43_SPI_SS_IDLE_DUR_MASK 0x0F000000 +#define CS42L43_SPI_SS_IDLE_DUR_SHIFT 24 +#define CS42L43_SPI_SS_DELAY_DUR_MASK 0x000F0000 +#define CS42L43_SPI_SS_DELAY_DUR_SHIFT 16 +#define CS42L43_SPI_THREE_WIRE_MASK 0x00000100 +#define CS42L43_SPI_THREE_WIRE_SHIFT 8 +#define CS42L43_SPI_DPHA_MASK 0x00000040 +#define CS42L43_SPI_DPHA_SHIFT 6 +#define CS42L43_SPI_CPHA_MASK 0x00000020 +#define CS42L43_SPI_CPHA_SHIFT 5 +#define CS42L43_SPI_CPOL_MASK 0x00000010 +#define CS42L43_SPI_CPOL_SHIFT 4 +#define CS42L43_SPI_SS_SEL_MASK 0x00000007 +#define CS42L43_SPI_SS_SEL_SHIFT 0 + +/* CS42L43_SPI_CONFIG2 */ +#define CS42L43_SPI_SS_FRC_MASK 0x00000001 +#define CS42L43_SPI_SS_FRC_SHIFT 0 + +/* CS42L43_SPI_CONFIG3 */ +#define CS42L43_SPI_WDT_ENA_MASK 0x00000001 +#define CS42L43_SPI_WDT_ENA_SHIFT 0 + +/* CS42L43_SPI_CONFIG4 */ +#define CS42L43_SPI_STALL_ENA_MASK 0x00010000 +#define CS42L43_SPI_STALL_ENA_SHIFT 16 + +/* CS42L43_SPI_STATUS1 */ +#define CS42L43_SPI_ABORT_STS_MASK 0x00000002 +#define CS42L43_SPI_ABORT_STS_SHIFT 1 +#define CS42L43_SPI_DONE_STS_MASK 0x00000001 +#define CS42L43_SPI_DONE_STS_SHIFT 0 + +/* CS42L43_SPI_STATUS2 */ +#define CS42L43_SPI_RX_DONE_STS_MASK 0x00000010 +#define CS42L43_SPI_RX_DONE_STS_SHIFT 4 +#define CS42L43_SPI_TX_DONE_STS_MASK 0x00000001 +#define CS42L43_SPI_TX_DONE_STS_SHIFT 0 + +/* CS42L43_TRAN_CONFIG1 */ +#define CS42L43_SPI_START_MASK 0x00000001 +#define CS42L43_SPI_START_SHIFT 0 + +/* CS42L43_TRAN_CONFIG2 */ +#define CS42L43_SPI_ABORT_MASK 0x00000001 +#define CS42L43_SPI_ABORT_SHIFT 0 + +/* CS42L43_TRAN_CONFIG3 */ +#define CS42L43_SPI_WORD_SIZE_MASK 0x00070000 +#define CS42L43_SPI_WORD_SIZE_SHIFT 16 +#define CS42L43_SPI_CMD_MASK 0x00000003 +#define CS42L43_SPI_CMD_SHIFT 0 + +/* CS42L43_TRAN_CONFIG4 */ +#define CS42L43_SPI_TX_LENGTH_MASK 0x0000FFFF +#define CS42L43_SPI_TX_LENGTH_SHIFT 0 + +/* CS42L43_TRAN_CONFIG5 */ +#define CS42L43_SPI_RX_LENGTH_MASK 0x0000FFFF +#define CS42L43_SPI_RX_LENGTH_SHIFT 0 + +/* CS42L43_TRAN_CONFIG6 */ +#define CS42L43_SPI_TX_BLOCK_LENGTH_MASK 0x0000000F +#define CS42L43_SPI_TX_BLOCK_LENGTH_SHIFT 0 + +/* CS42L43_TRAN_CONFIG7 */ +#define CS42L43_SPI_RX_BLOCK_LENGTH_MASK 0x0000000F +#define CS42L43_SPI_RX_BLOCK_LENGTH_SHIFT 0 + +/* CS42L43_TRAN_CONFIG8 */ +#define CS42L43_SPI_RX_DONE_MASK 0x00000010 +#define CS42L43_SPI_RX_DONE_SHIFT 4 +#define CS42L43_SPI_TX_DONE_MASK 0x00000001 +#define CS42L43_SPI_TX_DONE_SHIFT 0 + +/* CS42L43_TRAN_STATUS1 */ +#define CS42L43_SPI_BUSY_STS_MASK 0x00000100 +#define CS42L43_SPI_BUSY_STS_SHIFT 8 +#define CS42L43_SPI_RX_REQUEST_MASK 0x00000010 +#define CS42L43_SPI_RX_REQUEST_SHIFT 4 +#define CS42L43_SPI_TX_REQUEST_MASK 0x00000001 +#define CS42L43_SPI_TX_REQUEST_SHIFT 0 + +/* CS42L43_TRAN_STATUS2 */ +#define CS42L43_SPI_TX_BYTE_COUNT_MASK 0x0000FFFF +#define CS42L43_SPI_TX_BYTE_COUNT_SHIFT 0 + +/* CS42L43_TRAN_STATUS3 */ +#define CS42L43_SPI_RX_BYTE_COUNT_MASK 0x0000FFFF +#define CS42L43_SPI_RX_BYTE_COUNT_SHIFT 0 + +/* CS42L43_TX_DATA */ +#define CS42L43_SPI_TX_DATA_MASK 0xFFFFFFFF +#define CS42L43_SPI_TX_DATA_SHIFT 0 + +/* CS42L43_RX_DATA */ +#define CS42L43_SPI_RX_DATA_MASK 0xFFFFFFFF +#define CS42L43_SPI_RX_DATA_SHIFT 0 + +/* CS42L43_DACCNFG1 */ +#define CS42L43_HP_MSTR_VOL_CTRL_EN_MASK 0x00000008 +#define CS42L43_HP_MSTR_VOL_CTRL_EN_SHIFT 3 +#define CS42L43_AMP4_INV_MASK 0x00000002 +#define CS42L43_AMP4_INV_SHIFT 1 +#define CS42L43_AMP3_INV_MASK 0x00000001 +#define CS42L43_AMP3_INV_SHIFT 0 + +/* CS42L43_DACCNFG2 */ +#define CS42L43_HP_AUTO_CLAMP_DISABLE_MASK 0x00000002 +#define CS42L43_HP_AUTO_CLAMP_DISABLE_SHIFT 1 +#define CS42L43_HP_HPF_EN_MASK 0x00000001 +#define CS42L43_HP_HPF_EN_SHIFT 0 + +/* CS42L43_HPPATHVOL */ +#define CS42L43_AMP4_PATH_VOL_MASK 0x01FF0000 +#define CS42L43_AMP4_PATH_VOL_SHIFT 16 +#define CS42L43_AMP3_PATH_VOL_MASK 0x000001FF +#define CS42L43_AMP3_PATH_VOL_SHIFT 0 + +/* CS42L43_PGAVOL */ +#define CS42L43_HP_PATH_VOL_RAMP_MASK 0x0003C000 +#define CS42L43_HP_PATH_VOL_RAMP_SHIFT 14 +#define CS42L43_HP_PATH_VOL_ZC_MASK 0x00002000 +#define CS42L43_HP_PATH_VOL_ZC_SHIFT 13 +#define CS42L43_HP_PATH_VOL_SFT_MASK 0x00001000 +#define CS42L43_HP_PATH_VOL_SFT_SHIFT 12 +#define CS42L43_HP_DIG_VOL_RAMP_MASK 0x00000F00 +#define CS42L43_HP_DIG_VOL_RAMP_SHIFT 8 +#define CS42L43_HP_ANA_VOL_RAMP_MASK 0x0000000F +#define CS42L43_HP_ANA_VOL_RAMP_SHIFT 0 + +/* CS42L43_LOADDETRESULTS */ +#define CS42L43_AMP3_RES_DET_MASK 0x00000003 +#define CS42L43_AMP3_RES_DET_SHIFT 0 + +/* CS42L43_LOADDETENA */ +#define CS42L43_HPLOAD_DET_EN_MASK 0x00000001 +#define CS42L43_HPLOAD_DET_EN_SHIFT 0 + +/* CS42L43_CTRL */ +#define CS42L43_ADPTPWR_MODE_MASK 0x00000007 +#define CS42L43_ADPTPWR_MODE_SHIFT 0 + +/* CS42L43_COEFF_RD_WR0 */ +#define CS42L43_WRITE_MODE_MASK 0x00000002 +#define CS42L43_WRITE_MODE_SHIFT 1 + +/* CS42L43_INIT_DONE0 */ +#define CS42L43_INITIALIZE_DONE_MASK 0x00000001 +#define CS42L43_INITIALIZE_DONE_SHIFT 0 + +/* CS42L43_START_EQZ0 */ +#define CS42L43_START_FILTER_MASK 0x00000001 +#define CS42L43_START_FILTER_SHIFT 0 + +/* CS42L43_MUTE_EQ_IN0 */ +#define CS42L43_MUTE_EQ_CH2_MASK 0x00000002 +#define CS42L43_MUTE_EQ_CH2_SHIFT 1 +#define CS42L43_MUTE_EQ_CH1_MASK 0x00000001 +#define CS42L43_MUTE_EQ_CH1_SHIFT 0 + +/* CS42L43_PLL_INT */ +#define CS42L43_PLL_LOST_LOCK_INT_MASK 0x00000002 +#define CS42L43_PLL_LOST_LOCK_INT_SHIFT 1 +#define CS42L43_PLL_READY_INT_MASK 0x00000001 +#define CS42L43_PLL_READY_INT_SHIFT 0 + +/* CS42L43_SOFT_INT */ +#define CS42L43_CONTROL_APPLIED_INT_MASK 0x00000010 +#define CS42L43_CONTROL_APPLIED_INT_SHIFT 4 +#define CS42L43_CONTROL_WARN_INT_MASK 0x00000008 +#define CS42L43_CONTROL_WARN_INT_SHIFT 3 +#define CS42L43_PATCH_WARN_INT_MASK 0x00000002 +#define CS42L43_PATCH_WARN_INT_SHIFT 1 +#define CS42L43_PATCH_APPLIED_INT_MASK 0x00000001 +#define CS42L43_PATCH_APPLIED_INT_SHIFT 0 + +/* CS42L43_MSM_INT */ +#define CS42L43_HP_STARTUP_DONE_INT_MASK 0x00000800 +#define CS42L43_HP_STARTUP_DONE_INT_SHIFT 11 +#define CS42L43_HP_SHUTDOWN_DONE_INT_MASK 0x00000400 +#define CS42L43_HP_SHUTDOWN_DONE_INT_SHIFT 10 +#define CS42L43_HSDET_DONE_INT_MASK 0x00000200 +#define CS42L43_HSDET_DONE_INT_SHIFT 9 +#define CS42L43_TIPSENSE_UNPLUG_DB_INT_MASK 0x00000080 +#define CS42L43_TIPSENSE_UNPLUG_DB_INT_SHIFT 7 +#define CS42L43_TIPSENSE_PLUG_DB_INT_MASK 0x00000040 +#define CS42L43_TIPSENSE_PLUG_DB_INT_SHIFT 6 +#define CS42L43_RINGSENSE_UNPLUG_DB_INT_MASK 0x00000020 +#define CS42L43_RINGSENSE_UNPLUG_DB_INT_SHIFT 5 +#define CS42L43_RINGSENSE_PLUG_DB_INT_MASK 0x00000010 +#define CS42L43_RINGSENSE_PLUG_DB_INT_SHIFT 4 +#define CS42L43_TIPSENSE_UNPLUG_PDET_INT_MASK 0x00000008 +#define CS42L43_TIPSENSE_UNPLUG_PDET_INT_SHIFT 3 +#define CS42L43_TIPSENSE_PLUG_PDET_INT_MASK 0x00000004 +#define CS42L43_TIPSENSE_PLUG_PDET_INT_SHIFT 2 +#define CS42L43_RINGSENSE_UNPLUG_PDET_INT_MASK 0x00000002 +#define CS42L43_RINGSENSE_UNPLUG_PDET_INT_SHIFT 1 +#define CS42L43_RINGSENSE_PLUG_PDET_INT_MASK 0x00000001 +#define CS42L43_RINGSENSE_PLUG_PDET_INT_SHIFT 0 + +/* CS42L43_ACC_DET_INT */ +#define CS42L43_HS2_BIAS_SENSE_INT_MASK 0x00000800 +#define CS42L43_HS2_BIAS_SENSE_INT_SHIFT 11 +#define CS42L43_HS1_BIAS_SENSE_INT_MASK 0x00000400 +#define CS42L43_HS1_BIAS_SENSE_INT_SHIFT 10 +#define CS42L43_DC_DETECT1_FALSE_INT_MASK 0x00000080 +#define CS42L43_DC_DETECT1_FALSE_INT_SHIFT 7 +#define CS42L43_DC_DETECT1_TRUE_INT_MASK 0x00000040 +#define CS42L43_DC_DETECT1_TRUE_INT_SHIFT 6 +#define CS42L43_HSBIAS_CLAMPED_INT_MASK 0x00000008 +#define CS42L43_HSBIAS_CLAMPED_INT_SHIFT 3 +#define CS42L43_HS3_4_BIAS_SENSE_INT_MASK 0x00000001 +#define CS42L43_HS3_4_BIAS_SENSE_INT_SHIFT 0 + +/* CS42L43_SPI_MSTR_INT */ +#define CS42L43_IRQ_SPI_STALLING_INT_MASK 0x00000004 +#define CS42L43_IRQ_SPI_STALLING_INT_SHIFT 2 +#define CS42L43_IRQ_SPI_STS_INT_MASK 0x00000002 +#define CS42L43_IRQ_SPI_STS_INT_SHIFT 1 +#define CS42L43_IRQ_SPI_BLOCK_INT_MASK 0x00000001 +#define CS42L43_IRQ_SPI_BLOCK_INT_SHIFT 0 + +/* CS42L43_SW_TO_SPI_BRIDGE_INT */ +#define CS42L43_SW2SPI_BUF_OVF_UDF_INT_MASK 0x00000001 +#define CS42L43_SW2SPI_BUF_OVF_UDF_INT_SHIFT 0 + +/* CS42L43_CLASS_D_AMP_INT */ +#define CS42L43_AMP2_CLK_STOP_FAULT_INT_MASK 0x00002000 +#define CS42L43_AMP2_CLK_STOP_FAULT_INT_SHIFT 13 +#define CS42L43_AMP1_CLK_STOP_FAULT_INT_MASK 0x00001000 +#define CS42L43_AMP1_CLK_STOP_FAULT_INT_SHIFT 12 +#define CS42L43_AMP2_VDDSPK_FAULT_INT_MASK 0x00000800 +#define CS42L43_AMP2_VDDSPK_FAULT_INT_SHIFT 11 +#define CS42L43_AMP1_VDDSPK_FAULT_INT_MASK 0x00000400 +#define CS42L43_AMP1_VDDSPK_FAULT_INT_SHIFT 10 +#define CS42L43_AMP2_SHUTDOWN_DONE_INT_MASK 0x00000200 +#define CS42L43_AMP2_SHUTDOWN_DONE_INT_SHIFT 9 +#define CS42L43_AMP1_SHUTDOWN_DONE_INT_MASK 0x00000100 +#define CS42L43_AMP1_SHUTDOWN_DONE_INT_SHIFT 8 +#define CS42L43_AMP2_STARTUP_DONE_INT_MASK 0x00000080 +#define CS42L43_AMP2_STARTUP_DONE_INT_SHIFT 7 +#define CS42L43_AMP1_STARTUP_DONE_INT_MASK 0x00000040 +#define CS42L43_AMP1_STARTUP_DONE_INT_SHIFT 6 +#define CS42L43_AMP2_THERM_SHDN_INT_MASK 0x00000020 +#define CS42L43_AMP2_THERM_SHDN_INT_SHIFT 5 +#define CS42L43_AMP1_THERM_SHDN_INT_MASK 0x00000010 +#define CS42L43_AMP1_THERM_SHDN_INT_SHIFT 4 +#define CS42L43_AMP2_THERM_WARN_INT_MASK 0x00000008 +#define CS42L43_AMP2_THERM_WARN_INT_SHIFT 3 +#define CS42L43_AMP1_THERM_WARN_INT_MASK 0x00000004 +#define CS42L43_AMP1_THERM_WARN_INT_SHIFT 2 +#define CS42L43_AMP2_SCDET_INT_MASK 0x00000002 +#define CS42L43_AMP2_SCDET_INT_SHIFT 1 +#define CS42L43_AMP1_SCDET_INT_MASK 0x00000001 +#define CS42L43_AMP1_SCDET_INT_SHIFT 0 + +/* CS42L43_GPIO_INT */ +#define CS42L43_GPIO3_FALL_INT_MASK 0x00000020 +#define CS42L43_GPIO3_FALL_INT_SHIFT 5 +#define CS42L43_GPIO3_RISE_INT_MASK 0x00000010 +#define CS42L43_GPIO3_RISE_INT_SHIFT 4 +#define CS42L43_GPIO2_FALL_INT_MASK 0x00000008 +#define CS42L43_GPIO2_FALL_INT_SHIFT 3 +#define CS42L43_GPIO2_RISE_INT_MASK 0x00000004 +#define CS42L43_GPIO2_RISE_INT_SHIFT 2 +#define CS42L43_GPIO1_FALL_INT_MASK 0x00000002 +#define CS42L43_GPIO1_FALL_INT_SHIFT 1 +#define CS42L43_GPIO1_RISE_INT_MASK 0x00000001 +#define CS42L43_GPIO1_RISE_INT_SHIFT 0 + +/* CS42L43_HPOUT_INT */ +#define CS42L43_HP_ILIMIT_INT_MASK 0x00000002 +#define CS42L43_HP_ILIMIT_INT_SHIFT 1 +#define CS42L43_HP_LOADDET_DONE_INT_MASK 0x00000001 +#define CS42L43_HP_LOADDET_DONE_INT_SHIFT 0 + +/* CS42L43_BOOT_CONTROL */ +#define CS42L43_LOCK_HW_STS_MASK 0x00000002 +#define CS42L43_LOCK_HW_STS_SHIFT 1 + +/* CS42L43_BLOCK_EN */ +#define CS42L43_MCU_EN_MASK 0x00000001 +#define CS42L43_MCU_EN_SHIFT 0 + +/* CS42L43_SHUTTER_CONTROL */ +#define CS42L43_STATUS_SPK_SHUTTER_MUTE_MASK 0x00008000 +#define CS42L43_STATUS_SPK_SHUTTER_MUTE_SHIFT 15 +#define CS42L43_SPK_SHUTTER_CFG_MASK 0x00000F00 +#define CS42L43_SPK_SHUTTER_CFG_SHIFT 8 +#define CS42L43_STATUS_MIC_SHUTTER_MUTE_MASK 0x00000080 +#define CS42L43_STATUS_MIC_SHUTTER_MUTE_SHIFT 7 +#define CS42L43_MIC_SHUTTER_CFG_MASK 0x0000000F +#define CS42L43_MIC_SHUTTER_CFG_SHIFT 0 + +/* CS42L43_MCU_SW_REV */ +#define CS42L43_BIOS_SUBMINOR_REV_MASK 0xFF000000 +#define CS42L43_BIOS_SUBMINOR_REV_SHIFT 24 +#define CS42L43_BIOS_MINOR_REV_MASK 0x00F00000 +#define CS42L43_BIOS_MINOR_REV_SHIFT 20 +#define CS42L43_BIOS_MAJOR_REV_MASK 0x000F0000 +#define CS42L43_BIOS_MAJOR_REV_SHIFT 16 +#define CS42L43_FW_SUBMINOR_REV_MASK 0x0000FF00 +#define CS42L43_FW_SUBMINOR_REV_SHIFT 8 +#define CS42L43_FW_MINOR_REV_MASK 0x000000F0 +#define CS42L43_FW_MINOR_REV_SHIFT 4 +#define CS42L43_FW_MAJOR_REV_MASK 0x0000000F +#define CS42L43_FW_MAJOR_REV_SHIFT 0 + +/* CS42L43_NEED_CONFIGS */ +#define CS42L43_FW_PATCH_NEED_CFG_MASK 0x80000000 +#define CS42L43_FW_PATCH_NEED_CFG_SHIFT 31 + +/* CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION */ +#define CS42L43_FW_MM_CTRL_MCU_SEL_MASK 0x00000001 +#define CS42L43_FW_MM_CTRL_MCU_SEL_SHIFT 0 + +/* CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG */ +#define CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_DISABLE_VAL 0xF05AA50F + +#endif /* CS42L43_CORE_REGS_H */ diff --git a/include/linux/mfd/cs42l43.h b/include/linux/mfd/cs42l43.h new file mode 100644 index 000000000000..cf8263aab41b --- /dev/null +++ b/include/linux/mfd/cs42l43.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * CS42L43 core driver external data + * + * Copyright (C) 2022-2023 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#include <linux/completion.h> +#include <linux/device.h> +#include <linux/gpio/consumer.h> +#include <linux/mutex.h> +#include <linux/regmap.h> +#include <linux/regulator/consumer.h> +#include <linux/soundwire/sdw.h> +#include <linux/workqueue.h> + +#ifndef CS42L43_CORE_EXT_H +#define CS42L43_CORE_EXT_H + +#define CS42L43_N_SUPPLIES 3 + +enum cs42l43_irq_numbers { + CS42L43_PLL_LOST_LOCK, + CS42L43_PLL_READY, + + CS42L43_HP_STARTUP_DONE, + CS42L43_HP_SHUTDOWN_DONE, + CS42L43_HSDET_DONE, + CS42L43_TIPSENSE_UNPLUG_DB, + CS42L43_TIPSENSE_PLUG_DB, + CS42L43_RINGSENSE_UNPLUG_DB, + CS42L43_RINGSENSE_PLUG_DB, + CS42L43_TIPSENSE_UNPLUG_PDET, + CS42L43_TIPSENSE_PLUG_PDET, + CS42L43_RINGSENSE_UNPLUG_PDET, + CS42L43_RINGSENSE_PLUG_PDET, + + CS42L43_HS2_BIAS_SENSE, + CS42L43_HS1_BIAS_SENSE, + CS42L43_DC_DETECT1_FALSE, + CS42L43_DC_DETECT1_TRUE, + CS42L43_HSBIAS_CLAMPED, + CS42L43_HS3_4_BIAS_SENSE, + + CS42L43_AMP2_CLK_STOP_FAULT, + CS42L43_AMP1_CLK_STOP_FAULT, + CS42L43_AMP2_VDDSPK_FAULT, + CS42L43_AMP1_VDDSPK_FAULT, + CS42L43_AMP2_SHUTDOWN_DONE, + CS42L43_AMP1_SHUTDOWN_DONE, + CS42L43_AMP2_STARTUP_DONE, + CS42L43_AMP1_STARTUP_DONE, + CS42L43_AMP2_THERM_SHDN, + CS42L43_AMP1_THERM_SHDN, + CS42L43_AMP2_THERM_WARN, + CS42L43_AMP1_THERM_WARN, + CS42L43_AMP2_SCDET, + CS42L43_AMP1_SCDET, + + CS42L43_GPIO3_FALL, + CS42L43_GPIO3_RISE, + CS42L43_GPIO2_FALL, + CS42L43_GPIO2_RISE, + CS42L43_GPIO1_FALL, + CS42L43_GPIO1_RISE, + + CS42L43_HP_ILIMIT, + CS42L43_HP_LOADDET_DONE, +}; + +struct cs42l43 { + struct device *dev; + struct regmap *regmap; + struct sdw_slave *sdw; + + struct regulator *vdd_p; + struct regulator *vdd_d; + struct regulator_bulk_data core_supplies[CS42L43_N_SUPPLIES]; + + struct gpio_desc *reset; + + int irq; + struct regmap_irq_chip irq_chip; + struct regmap_irq_chip_data *irq_data; + + struct work_struct boot_work; + struct completion device_attach; + struct completion device_detach; + struct completion firmware_download; + int firmware_error; + + unsigned int sdw_freq; + /* Lock to gate control of the PLL and its sources. */ + struct mutex pll_lock; + + bool sdw_pll_active; + bool attached; + bool hw_lock; +}; + +#endif /* CS42L43_CORE_EXT_H */ diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index e7a7e70fdb38..dd0fc891b228 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -556,16 +556,6 @@ static inline void prcmu_clear(unsigned int reg, u32 bits) #define PRCMU_QOS_ARM_OPP 3 #define PRCMU_QOS_DEFAULT_VALUE -1 -static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void) -{ - return 0; -} - -static inline int prcmu_qos_requirement(int prcmu_qos_class) -{ - return 0; -} - static inline int prcmu_qos_add_requirement(int prcmu_qos_class, char *name, s32 value) { @@ -582,15 +572,4 @@ static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name) { } -static inline int prcmu_qos_add_notifier(int prcmu_qos_class, - struct notifier_block *notifier) -{ - return 0; -} -static inline int prcmu_qos_remove_notifier(int prcmu_qos_class, - struct notifier_block *notifier) -{ - return 0; -} - #endif /* __MACH_PRCMU_H */ diff --git a/include/linux/mfd/hi655x-pmic.h b/include/linux/mfd/hi655x-pmic.h index 6a012784dd1b..194556851ccf 100644 --- a/include/linux/mfd/hi655x-pmic.h +++ b/include/linux/mfd/hi655x-pmic.h @@ -52,7 +52,6 @@ #define OTMP_D1R_INT_MASK BIT(OTMP_D1R_INT) struct hi655x_pmic { - struct resource *res; struct device *dev; struct regmap *regmap; struct gpio_desc *gpio; diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index ea4a4b1b246a..1fbda1f8967d 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -15,7 +15,7 @@ #define ICH_RES_GPE0 1 /* GPIO compatibility */ -enum { +enum lpc_gpio_versions { ICH_I3100_GPIO, ICH_V5_GPIO, ICH_V6_GPIO, @@ -26,11 +26,14 @@ enum { AVOTON_GPIO, }; +struct lpc_ich_gpio_info; + struct lpc_ich_info { char name[32]; unsigned int iTCO_version; - unsigned int gpio_version; + enum lpc_gpio_versions gpio_version; enum intel_spi_type spi_type; + const struct lpc_ich_gpio_info *gpio_info; u8 use_gpio; }; diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index 3acceeedbaba..ea635d12a741 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -441,8 +441,4 @@ enum max77686_types { TYPE_MAX77802, }; -extern int max77686_irq_init(struct max77686_dev *max77686); -extern void max77686_irq_exit(struct max77686_dev *max77686); -extern int max77686_irq_resume(struct max77686_dev *max77686); - #endif /* __LINUX_MFD_MAX77686_PRIV_H */ diff --git a/include/linux/mfd/mt6358/registers.h b/include/linux/mfd/mt6358/registers.h index 3d33517f178c..d83e87298ac4 100644 --- a/include/linux/mfd/mt6358/registers.h +++ b/include/linux/mfd/mt6358/registers.h @@ -262,6 +262,12 @@ #define MT6358_LDO_VBIF28_CON3 0x1db0 #define MT6358_VCAMA1_ANA_CON0 0x1e08 #define MT6358_VCAMA2_ANA_CON0 0x1e0c +#define MT6358_VFE28_ANA_CON0 0x1e10 +#define MT6358_VCN28_ANA_CON0 0x1e14 +#define MT6358_VBIF28_ANA_CON0 0x1e18 +#define MT6358_VAUD28_ANA_CON0 0x1e1c +#define MT6358_VAUX18_ANA_CON0 0x1e20 +#define MT6358_VXO22_ANA_CON0 0x1e24 #define MT6358_VCN33_ANA_CON0 0x1e28 #define MT6358_VSIM1_ANA_CON0 0x1e2c #define MT6358_VSIM2_ANA_CON0 0x1e30 @@ -288,4 +294,21 @@ #define MT6358_AUD_TOP_INT_CON0 0x2228 #define MT6358_AUD_TOP_INT_STATUS0 0x2234 +/* + * MT6366 has no VCAM*, but has other regulators in its place. The names + * keep the MT6358 prefix for ease of use in the regulator driver. + */ +#define MT6358_LDO_VSRAM_CON5 0x1bf8 +#define MT6358_LDO_VM18_CON0 MT6358_LDO_VCAMA1_CON0 +#define MT6358_LDO_VM18_CON1 MT6358_LDO_VCAMA1_CON1 +#define MT6358_LDO_VM18_CON2 MT6358_LDO_VCAMA1_CON2 +#define MT6358_LDO_VMDDR_CON0 MT6358_LDO_VCAMA2_CON0 +#define MT6358_LDO_VMDDR_CON1 MT6358_LDO_VCAMA2_CON1 +#define MT6358_LDO_VMDDR_CON2 MT6358_LDO_VCAMA2_CON2 +#define MT6358_LDO_VSRAM_CORE_CON0 MT6358_LDO_VCAMD_CON0 +#define MT6358_LDO_VSRAM_CORE_DBG0 0x1cb6 +#define MT6358_LDO_VSRAM_CORE_DBG1 0x1cb8 +#define MT6358_VM18_ANA_CON0 MT6358_VCAMA1_ANA_CON0 +#define MT6358_VMDDR_ANA_CON0 MT6358_VCAMD_ANA_CON0 + #endif /* __MFD_MT6358_REGISTERS_H__ */ diff --git a/include/linux/mfd/rz-mtu3.h b/include/linux/mfd/rz-mtu3.h index c5173bc06270..8421d49500bf 100644 --- a/include/linux/mfd/rz-mtu3.h +++ b/include/linux/mfd/rz-mtu3.h @@ -151,7 +151,6 @@ struct rz_mtu3 { void *priv_data; }; -#if IS_ENABLED(CONFIG_RZ_MTU3) static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch) { mutex_lock(&ch->lock); @@ -188,70 +187,5 @@ void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val); void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val); void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, u16 off, u16 pos, u8 val); -#else -static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch) -{ - return false; -} - -static inline void rz_mtu3_release_channel(struct rz_mtu3_channel *ch) -{ -} - -static inline bool rz_mtu3_is_enabled(struct rz_mtu3_channel *ch) -{ - return false; -} - -static inline void rz_mtu3_disable(struct rz_mtu3_channel *ch) -{ -} - -static inline int rz_mtu3_enable(struct rz_mtu3_channel *ch) -{ - return 0; -} - -static inline u8 rz_mtu3_8bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u16 rz_mtu3_16bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u32 rz_mtu3_32bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u16 rz_mtu3_shared_reg_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline void rz_mtu3_8bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u8 val) -{ -} - -static inline void rz_mtu3_16bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u16 val) -{ -} - -static inline void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val) -{ -} - -static inline void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val) -{ -} - -static inline void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, - u16 off, u16 pos, u8 val) -{ -} -#endif #endif /* __MFD_RZ_MTU3_H__ */ diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index 1b94325febb3..ca35af30745f 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -102,6 +102,15 @@ enum stm32_timers_dmas { STM32_TIMERS_MAX_DMAS, }; +/* STM32 Timer may have either a unique global interrupt or 4 interrupt lines */ +enum stm32_timers_irqs { + STM32_TIMERS_IRQ_GLOBAL_BRK, /* global or brk IRQ */ + STM32_TIMERS_IRQ_UP, + STM32_TIMERS_IRQ_TRG_COM, + STM32_TIMERS_IRQ_CC, + STM32_TIMERS_MAX_IRQS, +}; + /** * struct stm32_timers_dma - STM32 timer DMA handling. * @completion: end of DMA transfer completion @@ -123,6 +132,8 @@ struct stm32_timers { struct regmap *regmap; u32 max_arr; struct stm32_timers_dma dma; /* Only to be used by the parent */ + unsigned int nr_irqs; + int irq[STM32_TIMERS_MAX_IRQS]; }; #if IS_REACHABLE(CONFIG_MFD_STM32_TIMERS) diff --git a/include/linux/mfd/tps65086.h b/include/linux/mfd/tps65086.h index 16f87cccc003..9185b5cd8371 100644 --- a/include/linux/mfd/tps65086.h +++ b/include/linux/mfd/tps65086.h @@ -13,8 +13,9 @@ #include <linux/regmap.h> /* List of registers for TPS65086 */ -#define TPS65086_DEVICEID 0x01 -#define TPS65086_IRQ 0x02 +#define TPS65086_DEVICEID1 0x00 +#define TPS65086_DEVICEID2 0x01 +#define TPS65086_IRQ 0x02 #define TPS65086_IRQ_MASK 0x03 #define TPS65086_PMICSTAT 0x04 #define TPS65086_SHUTDNSRC 0x05 @@ -75,10 +76,16 @@ #define TPS65086_IRQ_SHUTDN_MASK BIT(3) #define TPS65086_IRQ_FAULT_MASK BIT(7) -/* DEVICEID Register field definitions */ -#define TPS65086_DEVICEID_PART_MASK GENMASK(3, 0) -#define TPS65086_DEVICEID_OTP_MASK GENMASK(5, 4) -#define TPS65086_DEVICEID_REV_MASK GENMASK(7, 6) +/* DEVICEID1 Register field definitions */ +#define TPS6508640_ID 0x00 +#define TPS65086401_ID 0x01 +#define TPS6508641_ID 0x10 +#define TPS65086470_ID 0x70 + +/* DEVICEID2 Register field definitions */ +#define TPS65086_DEVICEID2_PART_MASK GENMASK(3, 0) +#define TPS65086_DEVICEID2_OTP_MASK GENMASK(5, 4) +#define TPS65086_DEVICEID2_REV_MASK GENMASK(7, 6) /* VID Masks */ #define BUCK_VID_MASK GENMASK(7, 1) @@ -92,6 +99,8 @@ enum tps65086_irqs { TPS65086_IRQ_FAULT, }; +struct tps65086_regulator_config; + /** * struct tps65086 - state holder for the tps65086 driver * @@ -100,6 +109,8 @@ enum tps65086_irqs { struct tps65086 { struct device *dev; struct regmap *regmap; + unsigned int chip_id; + const struct tps65086_regulator_config *reg_config; /* IRQ Data */ int irq; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index f6de4b6ecfc7..039943ec4d4e 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -299,6 +299,10 @@ struct mhi_controller_config { * @iova_start: IOMMU starting address for data (required) * @iova_stop: IOMMU stop address for data (required) * @fw_image: Firmware image name for normal booting (optional) + * @fw_data: Firmware image data content for normal booting, used only + * if fw_image is NULL and fbc_download is true (optional) + * @fw_sz: Firmware image data size for normal booting, used only if fw_image + * is NULL and fbc_download is true (optional) * @edl_image: Firmware image name for emergency download mode (optional) * @rddm_size: RAM dump size that host should allocate for debugging purpose * @sbl_size: SBL image size downloaded through BHIe (optional) @@ -384,6 +388,8 @@ struct mhi_controller { dma_addr_t iova_start; dma_addr_t iova_stop; const char *fw_image; + const u8 *fw_data; + size_t fw_sz; const char *edl_image; size_t rddm_size; size_t sbl_size; diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 8bef1ab62bba..591bf5b5e8dc 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -41,9 +41,10 @@ #define PHY_ID_KSZ9477 0x00221631 /* struct phy_device dev_flags definitions */ -#define MICREL_PHY_50MHZ_CLK 0x00000001 -#define MICREL_PHY_FXEN 0x00000002 -#define MICREL_KSZ8_P1_ERRATA 0x00000003 +#define MICREL_PHY_50MHZ_CLK BIT(0) +#define MICREL_PHY_FXEN BIT(1) +#define MICREL_KSZ8_P1_ERRATA BIT(2) +#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC @@ -63,6 +64,10 @@ #define KSZ886X_BMCR_DISABLE_TRANSMIT BIT(1) #define KSZ886X_BMCR_DISABLE_LED BIT(0) +/* PHY Special Control/Status Register (Reg 31) */ #define KSZ886X_CTRL_MDIX_STAT BIT(4) +#define KSZ886X_CTRL_FORCE_LINK BIT(3) +#define KSZ886X_CTRL_PWRSAVE BIT(2) +#define KSZ886X_CTRL_REMOTE_LOOPBACK BIT(1) #endif /* _MICREL_PHY_H */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 711dd9412561..2ce13e8a309b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -142,10 +142,10 @@ const struct movable_operations *page_movable_ops(struct page *page) } #ifdef CONFIG_NUMA_BALANCING -int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, +int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node); #else -static inline int migrate_misplaced_page(struct page *page, +static inline int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node) { return -EAGAIN; /* can't migrate now */ diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 396df1121bff..2ec559284a9f 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,59 +2,77 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H +#include <linux/build_bug.h> +#include <linux/compiler.h> #include <linux/const.h> +#include <linux/types.h> /* * min()/max()/clamp() macros must accomplish three things: * - * - avoid multiple evaluations of the arguments (so side-effects like + * - Avoid multiple evaluations of the arguments (so side-effects like * "x++" happen only once) when non-constant. - * - perform strict type-checking (to generate warnings instead of - * nasty runtime surprises). See the "unnecessary" pointer comparison - * in __typecheck(). - * - retain result as a constant expressions when called with only + * - Retain result as a constant expressions when called with only * constant expressions (to avoid tripping VLA warnings in stack * allocation usage). + * - Perform signed v unsigned type-checking (to generate compile + * errors instead of nasty runtime surprises). + * - Unsigned char/short are always promoted to signed int and can be + * compared against signed or unsigned arguments. + * - Unsigned arguments can be compared against non-negative signed constants. + * - Comparison of a signed argument against an unsigned constant fails + * even if the constant is below __INT_MAX__ and could be cast to int. */ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -#define __no_side_effects(x, y) \ - (__is_constexpr(x) && __is_constexpr(y)) +/* is_signed_type() isn't a constexpr for pointer types */ +#define __is_signed(x) \ + __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \ + is_signed_type(typeof(x)), 0) -#define __safe_cmp(x, y) \ - (__typecheck(x, y) && __no_side_effects(x, y)) +/* True for a non-negative signed int constant */ +#define __is_noneg_int(x) \ + (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) -#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) +#define __types_ok(x, y) \ + (__is_signed(x) == __is_signed(y) || \ + __is_signed((x) + 0) == __is_signed((y) + 0) || \ + __is_noneg_int(x) || __is_noneg_int(y)) -#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ - typeof(x) unique_x = (x); \ - typeof(y) unique_y = (y); \ - __cmp(unique_x, unique_y, op); }) +#define __cmp_op_min < +#define __cmp_op_max > -#define __careful_cmp(x, y, op) \ - __builtin_choose_expr(__safe_cmp(x, y), \ - __cmp(x, y, op), \ - __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) +#define __cmp(op, x, y) ((x) __cmp_op_##op (y) ? (x) : (y)) + +#define __cmp_once(op, x, y, unique_x, unique_y) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + static_assert(__types_ok(x, y), \ + #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ + __cmp(op, unique_x, unique_y); }) + +#define __careful_cmp(op, x, y) \ + __builtin_choose_expr(__is_constexpr((x) - (y)), \ + __cmp(op, x, y), \ + __cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y))) #define __clamp(val, lo, hi) \ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) -#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ - typeof(val) unique_val = (val); \ - typeof(lo) unique_lo = (lo); \ - typeof(hi) unique_hi = (hi); \ - __clamp(unique_val, unique_lo, unique_hi); }) - -#define __clamp_input_check(lo, hi) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((lo) > (hi)), (lo) > (hi), false))) +#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ + typeof(val) unique_val = (val); \ + typeof(lo) unique_lo = (lo); \ + typeof(hi) unique_hi = (hi); \ + static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ + (lo) <= (hi), true), \ + "clamp() low limit " #lo " greater than high limit " #hi); \ + static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \ + static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \ + __clamp(unique_val, unique_lo, unique_hi); }) #define __careful_clamp(val, lo, hi) ({ \ - __clamp_input_check(lo, hi) + \ - __builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) && \ - __typecheck(hi, lo) && __is_constexpr(val) && \ - __is_constexpr(lo) && __is_constexpr(hi), \ + __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \ __clamp(val, lo, hi), \ __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) @@ -64,14 +82,31 @@ * @x: first value * @y: second value */ -#define min(x, y) __careful_cmp(x, y, <) +#define min(x, y) __careful_cmp(min, x, y) /** * max - return maximum of two values of the same or compatible types * @x: first value * @y: second value */ -#define max(x, y) __careful_cmp(x, y, >) +#define max(x, y) __careful_cmp(max, x, y) + +/** + * umin - return minimum of two non-negative values + * Signed types are zero extended to match a larger unsigned type. + * @x: first value + * @y: second value + */ +#define umin(x, y) \ + __careful_cmp(min, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) + +/** + * umax - return maximum of two non-negative values + * @x: first value + * @y: second value + */ +#define umax(x, y) \ + __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) /** * min3 - return minimum of three values @@ -123,7 +158,7 @@ * @x: first value * @y: second value */ -#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) +#define min_t(type, x, y) __careful_cmp(min, (type)(x), (type)(y)) /** * max_t - return maximum of two values, using the specified type @@ -131,7 +166,50 @@ * @x: first value * @y: second value */ -#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) +#define max_t(type, x, y) __careful_cmp(max, (type)(x), (type)(y)) + +/* + * Do not check the array parameter using __must_be_array(). + * In the following legit use-case where the "array" passed is a simple pointer, + * __must_be_array() will return a failure. + * --- 8< --- + * int *buff + * ... + * min = min_array(buff, nb_items); + * --- 8< --- + * + * The first typeof(&(array)[0]) is needed in order to support arrays of both + * 'int *buff' and 'int buff[N]' types. + * + * The array can be an array of const items. + * typeof() keeps the const qualifier. Use __unqual_scalar_typeof() in order + * to discard the const qualifier for the __element variable. + */ +#define __minmax_array(op, array, len) ({ \ + typeof(&(array)[0]) __array = (array); \ + typeof(len) __len = (len); \ + __unqual_scalar_typeof(__array[0]) __element = __array[--__len];\ + while (__len--) \ + __element = op(__element, __array[__len]); \ + __element; }) + +/** + * min_array - return minimum of values present in an array + * @array: array + * @len: array length + * + * Note that @len must not be zero (empty array). + */ +#define min_array(array, len) __minmax_array(min, array, len) + +/** + * max_array - return maximum of values present in an array + * @array: array + * @len: array length + * + * Note that @len must not be zero (empty array). + */ +#define max_array(array, len) __minmax_array(max, array, len) /** * clamp_t - return a value clamped to a given range using a given type @@ -158,6 +236,32 @@ */ #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) +static inline bool in_range64(u64 val, u64 start, u64 len) +{ + return (val - start) < len; +} + +static inline bool in_range32(u32 val, u32 start, u32 len) +{ + return (val - start) < len; +} + +/** + * in_range - Determine if a value lies within a range. + * @val: Value to test. + * @start: First value in range. + * @len: Number of values in range. + * + * This is more efficient than "if (start <= val && val < (start + len))". + * It also gives a different answer if @start + @len overflows the size of + * the type by a sufficient amount to encompass @val. Decide for yourself + * which behaviour you want, or prove that start + len never overflow. + * Do not blindly replace one form with the other. + */ +#define in_range(val, start, len) \ + ((sizeof(start) | sizeof(len) | sizeof(val)) <= sizeof(u32) ? \ + in_range32(val, start, len) : in_range64(val, start, len)) + /** * swap - swap values of @a and @b * @a: first value diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index c238207d1615..e799b1f8d05b 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -31,17 +31,18 @@ struct misc_cg; * struct misc_res: Per cgroup per misc type resource * @max: Maximum limit on the resource. * @usage: Current usage of the resource. - * @failed: True if charged failed for the resource in a cgroup. + * @events: Number of times, the resource limit exceeded. */ struct misc_res { - unsigned long max; - atomic_long_t usage; - atomic_long_t events; + u64 max; + atomic64_t usage; + atomic64_t events; }; /** * struct misc_cg - Miscellaneous controller's cgroup structure. * @css: cgroup subsys state object. + * @events_file: Handle for the misc resources events file. * @res: Array of misc resources usage in the cgroup. */ struct misc_cg { @@ -53,12 +54,10 @@ struct misc_cg { struct misc_res res[MISC_CG_RES_TYPES]; }; -unsigned long misc_cg_res_total_usage(enum misc_res_type type); -int misc_cg_set_capacity(enum misc_res_type type, unsigned long capacity); -int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, - unsigned long amount); -void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, - unsigned long amount); +u64 misc_cg_res_total_usage(enum misc_res_type type); +int misc_cg_set_capacity(enum misc_res_type type, u64 capacity); +int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount); +void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64 amount); /** * css_misc() - Get misc cgroup from the css. @@ -99,27 +98,26 @@ static inline void put_misc_cg(struct misc_cg *cg) #else /* !CONFIG_CGROUP_MISC */ -static inline unsigned long misc_cg_res_total_usage(enum misc_res_type type) +static inline u64 misc_cg_res_total_usage(enum misc_res_type type) { return 0; } -static inline int misc_cg_set_capacity(enum misc_res_type type, - unsigned long capacity) +static inline int misc_cg_set_capacity(enum misc_res_type type, u64 capacity) { return 0; } static inline int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, - unsigned long amount) + u64 amount) { return 0; } static inline void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, - unsigned long amount) + u64 amount) { } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6646634a0b9d..27f42f713c89 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -33,6 +33,7 @@ #ifndef MLX4_DEVICE_H #define MLX4_DEVICE_H +#include <linux/auxiliary_bus.h> #include <linux/if_ether.h> #include <linux/pci.h> #include <linux/completion.h> @@ -889,6 +890,12 @@ struct mlx4_dev { u8 uar_page_shift; }; +struct mlx4_adev { + struct auxiliary_device adev; + struct mlx4_dev *mdev; + int idx; +}; + struct mlx4_clock_params { u64 offset; u8 bar; @@ -1087,6 +1094,19 @@ static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) (offset & (PAGE_SIZE - 1)); } +static inline int mlx4_is_bonded(struct mlx4_dev *dev) +{ + return !!(dev->flags & MLX4_FLAG_BONDED); +} + +static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev) +{ + return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev)); +} + +int mlx4_queue_bond_work(struct mlx4_dev *dev, int is_bonded, u8 v2p_p1, + u8 v2p_p2); + int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 1834c8fad12e..69825223081f 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -34,8 +34,12 @@ #define MLX4_DRIVER_H #include <net/devlink.h> +#include <linux/auxiliary_bus.h> +#include <linux/notifier.h> #include <linux/mlx4/device.h> +#define MLX4_ADEV_NAME "mlx4_core" + struct mlx4_dev; #define MLX4_MAC_MASK 0xffffffffffffULL @@ -54,41 +58,19 @@ enum { MLX4_INTFF_BONDING = 1 << 0 }; -struct mlx4_interface { - void * (*add) (struct mlx4_dev *dev); - void (*remove)(struct mlx4_dev *dev, void *context); - void (*event) (struct mlx4_dev *dev, void *context, - enum mlx4_dev_event event, unsigned long param); - void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port); - void (*activate)(struct mlx4_dev *dev, void *context); - struct list_head list; +struct mlx4_adrv { + struct auxiliary_driver adrv; enum mlx4_protocol protocol; int flags; }; -int mlx4_register_interface(struct mlx4_interface *intf); -void mlx4_unregister_interface(struct mlx4_interface *intf); - -int mlx4_bond(struct mlx4_dev *dev); -int mlx4_unbond(struct mlx4_dev *dev); -static inline int mlx4_is_bonded(struct mlx4_dev *dev) -{ - return !!(dev->flags & MLX4_FLAG_BONDED); -} - -static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev) -{ - return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev)); -} - -struct mlx4_port_map { - u8 port1; - u8 port2; -}; - -int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p); +int mlx4_register_auxiliary_driver(struct mlx4_adrv *madrv); +void mlx4_unregister_auxiliary_driver(struct mlx4_adrv *madrv); -void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port); +int mlx4_register_event_notifier(struct mlx4_dev *dev, + struct notifier_block *nb); +int mlx4_unregister_event_notifier(struct mlx4_dev *dev, + struct notifier_block *nb); struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 80cc12a9a531..820bca965fb6 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -364,6 +364,11 @@ enum mlx5_event { enum mlx5_driver_event { MLX5_DRIVER_EVENT_TYPE_TRAP = 0, MLX5_DRIVER_EVENT_UPLINK_NETDEV, + MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, + MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, }; enum { @@ -1208,9 +1213,7 @@ enum mlx5_cap_type { MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, MLX5_CAP_ESWITCH, - MLX5_CAP_RESERVED, - MLX5_CAP_VECTOR_CALC, - MLX5_CAP_QOS, + MLX5_CAP_QOS = 0xc, MLX5_CAP_DEBUG, MLX5_CAP_RESERVED_14, MLX5_CAP_DEV_MEM, @@ -1220,7 +1223,6 @@ enum mlx5_cap_type { MLX5_CAP_DEV_EVENT = 0x14, MLX5_CAP_IPSEC, MLX5_CAP_CRYPTO = 0x1a, - MLX5_CAP_DEV_SHAMPO = 0x1d, MLX5_CAP_MACSEC = 0x1f, MLX5_CAP_GENERAL_2 = 0x20, MLX5_CAP_PORT_SELECTION = 0x25, @@ -1239,7 +1241,6 @@ enum mlx5_pcam_feature_groups { enum mlx5_mcam_reg_groups { MLX5_MCAM_REGS_FIRST_128 = 0x0, - MLX5_MCAM_REGS_0x9080_0x90FF = 0x1, MLX5_MCAM_REGS_0x9100_0x917F = 0x2, MLX5_MCAM_REGS_NUM = 0x3, }; @@ -1279,10 +1280,6 @@ enum mlx5_qcam_feature_groups { MLX5_GET(per_protocol_networking_offload_caps,\ mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->cur, cap) -#define MLX5_CAP_ETH_MAX(mdev, cap) \ - MLX5_GET(per_protocol_networking_offload_caps,\ - mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->max, cap) - #define MLX5_CAP_IPOIB_ENHANCED(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->caps.hca[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS]->cur, cap) @@ -1305,77 +1302,40 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP64_FLOWTABLE(mdev, cap) \ MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca[MLX5_CAP_FLOW_TABLE]->cur, cap) -#define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ - MLX5_GET(flow_table_nic_cap, mdev->caps.hca[MLX5_CAP_FLOW_TABLE]->max, cap) - #define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap) -#define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) - #define MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit.cap) -#define MLX5_CAP_FLOWTABLE_NIC_TX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit.cap) - #define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap) -#define MLX5_CAP_FLOWTABLE_SNIFFER_RX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_sniffer.cap) - #define MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_sniffer.cap) -#define MLX5_CAP_FLOWTABLE_SNIFFER_TX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_sniffer.cap) - #define MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_rdma.cap) -#define MLX5_CAP_FLOWTABLE_RDMA_RX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_rdma.cap) - #define MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_rdma.cap) -#define MLX5_CAP_FLOWTABLE_RDMA_TX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_rdma.cap) - #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap) -#define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \ - MLX5_GET(flow_table_eswitch_cap, \ - mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->max, cap) - #define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap) -#define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ - MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) - #define MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_egress.cap) -#define MLX5_CAP_ESW_EGRESS_ACL_MAX(mdev, cap) \ - MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_egress.cap) - #define MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_ingress.cap) -#define MLX5_CAP_ESW_INGRESS_ACL_MAX(mdev, cap) \ - MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_ingress.cap) - #define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, ft_field_support_2_esw_fdb.cap) -#define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2_MAX(mdev, cap) \ - MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, ft_field_support_2_esw_fdb.cap) - #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH]->cur, cap) @@ -1384,10 +1344,6 @@ enum mlx5_qcam_feature_groups { MLX5_GET64(flow_table_eswitch_cap, \ (mdev)->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap) -#define MLX5_CAP_ESW_MAX(mdev, cap) \ - MLX5_GET(e_switch_cap, \ - mdev->caps.hca[MLX5_CAP_ESWITCH]->max, cap) - #define MLX5_CAP_PORT_SELECTION(mdev, cap) \ MLX5_GET(port_selection_cap, \ mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, cap) @@ -1400,26 +1356,15 @@ enum mlx5_qcam_feature_groups { MLX5_GET(adv_virtualization_cap, \ mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->cur, cap) -#define MLX5_CAP_ADV_VIRTUALIZATION_MAX(mdev, cap) \ - MLX5_GET(adv_virtualization_cap, \ - mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->max, cap) - #define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \ MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap) -#define MLX5_CAP_FLOWTABLE_PORT_SELECTION_MAX(mdev, cap) \ - MLX5_CAP_PORT_SELECTION_MAX(mdev, flow_table_properties_port_selection.cap) - #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap) #define MLX5_CAP_ODP_MAX(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap) -#define MLX5_CAP_VECTOR_CALC(mdev, cap) \ - MLX5_GET(vector_calc_cap, \ - mdev->caps.hca[MLX5_CAP_VECTOR_CALC]->cur, cap) - #define MLX5_CAP_QOS(mdev, cap)\ MLX5_GET(qos_cap, mdev->caps.hca[MLX5_CAP_QOS]->cur, cap) @@ -1436,10 +1381,6 @@ enum mlx5_qcam_feature_groups { MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_FIRST_128], \ mng_access_reg_cap_mask.access_regs.reg) -#define MLX5_CAP_MCAM_REG1(mdev, reg) \ - MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9080_0x90FF], \ - mng_access_reg_cap_mask.access_regs1.reg) - #define MLX5_CAP_MCAM_REG2(mdev, reg) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9100_0x917F], \ mng_access_reg_cap_mask.access_regs2.reg) @@ -1485,9 +1426,6 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_CRYPTO(mdev, cap)\ MLX5_GET(crypto_cap, (mdev)->caps.hca[MLX5_CAP_CRYPTO]->cur, cap) -#define MLX5_CAP_DEV_SHAMPO(mdev, cap)\ - MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap) - #define MLX5_CAP_MACSEC(mdev, cap)\ MLX5_GET(macsec_cap, (mdev)->caps.hca[MLX5_CAP_MACSEC]->cur, cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 25d0528f9219..d2b8d4a74a30 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -134,6 +134,7 @@ enum { MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, + MLX5_REG_MTCAP = 0x9009, MLX5_REG_MTMP = 0x900A, MLX5_REG_MCIA = 0x9014, MLX5_REG_MFRL = 0x9028, @@ -154,6 +155,8 @@ enum { MLX5_REG_MCC = 0x9062, MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, + MLX5_REG_MSECQ = 0x9155, + MLX5_REG_MSEES = 0x9156, MLX5_REG_MIRC = 0x9162, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, @@ -287,18 +290,23 @@ struct mlx5_cmd_stats { struct mlx5_cmd { struct mlx5_nb nb; + /* members which needs to be queried or reinitialized each reload */ + struct { + u16 cmdif_rev; + u8 log_sz; + u8 log_stride; + int max_reg_cmds; + unsigned long bitmask; + struct semaphore sem; + struct semaphore pages_sem; + struct semaphore throttle_sem; + } vars; enum mlx5_cmdif_state state; void *cmd_alloc_buf; dma_addr_t alloc_dma; int alloc_size; void *cmd_buf; dma_addr_t dma; - u16 cmdif_rev; - u8 log_sz; - u8 log_stride; - int max_reg_cmds; - int events; - u32 __iomem *vector; /* protect command queue allocations */ @@ -308,12 +316,8 @@ struct mlx5_cmd { */ spinlock_t token_lock; u8 token; - unsigned long bitmask; char wq_name[MLX5_CMD_WQ_MAX_NAME]; struct workqueue_struct *wq; - struct semaphore sem; - struct semaphore pages_sem; - struct semaphore throttle_sem; int mode; u16 allowed_opcode; struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; @@ -321,7 +325,7 @@ struct mlx5_cmd { struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; - struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; + struct xarray stats; }; struct mlx5_cmd_mailbox { @@ -501,7 +505,7 @@ struct mlx5_events; struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; -struct mlx5_devcom; +struct mlx5_devcom_dev; struct mlx5_fw_reset; struct mlx5_eq_table; struct mlx5_irq_table; @@ -611,6 +615,7 @@ struct mlx5_priv { int adev_idx; int sw_vhca_id; struct mlx5_events *events; + struct mlx5_vhca_events *vhca_events; struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; @@ -618,7 +623,8 @@ struct mlx5_priv { struct mlx5_core_sriov sriov; struct mlx5_lag *lag; u32 flags; - struct mlx5_devcom *devcom; + struct mlx5_devcom_dev *devc; + struct mlx5_devcom_comp_dev *hca_devcom_comp; struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; struct mlx5_fc_stats fc_stats; @@ -725,7 +731,6 @@ struct mlx5_fw_tracer; struct mlx5_vxlan; struct mlx5_geneve; struct mlx5_hv_vhca; -struct mlx5_thermal; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) @@ -804,7 +809,15 @@ struct mlx5_core_dev { struct mlx5_rsc_dump *rsc_dump; u32 vsc_addr; struct mlx5_hv_vhca *hv_vhca; - struct mlx5_thermal *thermal; + struct mlx5_hwmon *hwmon; + u64 num_block_tc; + u64 num_block_ipsec; +#ifdef CONFIG_MLX5_MACSEC + struct mlx5_macsec_fs *macsec_fs; + /* MACsec notifier chain to sync MACsec core and IB database */ + struct blocking_notifier_head macsec_nh; +#endif + u64 num_ipsec_offloads; }; struct mlx5_db { @@ -1018,7 +1031,8 @@ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); -int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data); + void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); @@ -1029,10 +1043,6 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); -struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, - gfp_t flags, int npages); -void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, - struct mlx5_cmd_mailbox *head); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, int inlen); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey); @@ -1046,8 +1056,6 @@ void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev); void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev); -void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); @@ -1055,7 +1063,7 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); +int mlx5_comp_eqn_get(struct mlx5_core_dev *dev, u16 vecidx, int *eqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); @@ -1087,8 +1095,6 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); __be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); -int mlx5_query_odp_caps(struct mlx5_core_dev *dev, - struct mlx5_odp_caps *odp_caps); int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); @@ -1105,9 +1111,8 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, bool map_wc, bool fast_path); void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); -unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev); -struct cpumask * -mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector); +unsigned int mlx5_comp_vectors_max(struct mlx5_core_dev *dev); +int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector); unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, @@ -1191,12 +1196,6 @@ int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev, void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev, int vf_id, struct notifier_block *nb); -#ifdef CONFIG_MLX5_CORE_IPOIB -struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, - struct ib_device *ibdev, - const char *name, - void (*setup)(struct net_device *)); -#endif /* CONFIG_MLX5_CORE_IPOIB */ int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, struct ib_device *device, struct rdma_netdev_alloc_params *params); @@ -1320,6 +1319,52 @@ static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev) return mlx5_is_roce_on(dev); } +#ifdef CONFIG_MLX5_MACSEC +static inline bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) +{ + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, log_max_macsec_offload)) + return false; + + if (!MLX5_CAP_FLOWTABLE_NIC_RX(mdev, macsec_decrypt) || + !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_remove_macsec)) + return false; + + if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, macsec_encrypt) || + !MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_macsec)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_encrypt) && + !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_encrypt)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_decrypt) && + !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_decrypt)) + return false; + + return true; +} + +#define NIC_RDMA_BOTH_DIRS_CAPS (MLX5_FT_NIC_RX_2_NIC_RX_RDMA | MLX5_FT_NIC_TX_RDMA_2_NIC_TX) + +static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev) +{ + if (((MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & + NIC_RDMA_BOTH_DIRS_CAPS) != NIC_RDMA_BOTH_DIRS_CAPS) || + !MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, max_modify_header_actions) || + !mlx5e_is_macsec_device(mdev) || !mdev->macsec_fs) + return false; + + return true; +} +#endif + enum { MLX5_OCTWORD = 16, }; diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index e2701ed0200e..950d2431a53c 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -144,6 +144,9 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \ ESW_TUN_OPTS_OFFSET + 1) +/* reuse tun_opts for the mapped ipsec obj id when tun_id is 0 (invalid) */ +#define ESW_IPSEC_RX_MAPPED_ID_MASK GENMASK(ESW_TUN_OPTS_BITS - 1, 0) + u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 2cb404c7ea13..6f7725238abc 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -67,6 +67,7 @@ enum { MLX5_FLOW_TABLE_TERMINATION = BIT(2), MLX5_FLOW_TABLE_UNMANAGED = BIT(3), MLX5_FLOW_TABLE_OTHER_VPORT = BIT(4), + MLX5_FLOW_TABLE_UPLINK_VPORT = BIT(5), }; #define LEFTOVERS_RULE_NUM 2 @@ -105,15 +106,19 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS, MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC, MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC, + MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC, + MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC, }; enum { FDB_BYPASS_PATH, + FDB_CRYPTO_INGRESS, FDB_TC_OFFLOAD, FDB_FT_OFFLOAD, FDB_TC_MISS, FDB_BR_OFFLOAD, FDB_SLOW_PATH, + FDB_CRYPTO_EGRESS, FDB_PER_VPORT, }; diff --git a/include/linux/mlx5/macsec.h b/include/linux/mlx5/macsec.h new file mode 100644 index 000000000000..f7ff4c2a95d0 --- /dev/null +++ b/include/linux/mlx5/macsec.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef MLX5_MACSEC_H +#define MLX5_MACSEC_H + +#ifdef CONFIG_MLX5_MACSEC +struct mlx5_macsec_event_data { + struct mlx5_macsec_fs *macsec_fs; + void *macdev; + u32 fs_id; + bool is_tx; +}; + +int mlx5_macsec_add_roce_rule(void *macdev, const struct sockaddr *addr, u16 gid_idx, + struct list_head *tx_rules_list, struct list_head *rx_rules_list, + struct mlx5_macsec_fs *macsec_fs); + +void mlx5_macsec_del_roce_rule(u16 gid_idx, struct mlx5_macsec_fs *macsec_fs, + struct list_head *tx_rules_list, struct list_head *rx_rules_list); + +void mlx5_macsec_add_roce_sa_rules(u32 fs_id, const struct sockaddr *addr, u16 gid_idx, + struct list_head *tx_rules_list, + struct list_head *rx_rules_list, + struct mlx5_macsec_fs *macsec_fs, bool is_tx); + +void mlx5_macsec_del_roce_sa_rules(u32 fs_id, struct mlx5_macsec_fs *macsec_fs, + struct list_head *tx_rules_list, + struct list_head *rx_rules_list, bool is_tx); + +#endif +#endif /* MLX5_MACSEC_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 33344a71c3e3..3f7b664d625b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -65,9 +65,11 @@ enum { enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ETHERNET_OFFLOADS = 0x1, MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4, + MLX5_SET_HCA_CAP_OP_MOD_IPSEC = 0x15, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20, MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION = 0x25, }; @@ -310,6 +312,7 @@ enum { MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, + MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16, MLX5_CMD_OP_MAX }; @@ -464,10 +467,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reformat_add_esp_trasport[0x1]; u8 reformat_l2_to_l3_esp_tunnel[0x1]; - u8 reserved_at_42[0x1]; + u8 reformat_add_esp_transport_over_udp[0x1]; u8 reformat_del_esp_trasport[0x1]; u8 reformat_l3_esp_tunnel_to_l2[0x1]; - u8 reserved_at_45[0x1]; + u8 reformat_del_esp_transport_over_udp[0x1]; u8 execute_aso[0x1]; u8 reserved_at_47[0x19]; @@ -618,7 +621,7 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_140[0x8]; u8 bth_dst_qp[0x18]; - u8 reserved_at_160[0x20]; + u8 inner_esp_spi[0x20]; u8 outer_esp_spi[0x20]; u8 reserved_at_1a0[0x60]; }; @@ -1229,7 +1232,13 @@ struct mlx5_ifc_virtio_emulation_cap_bits { u8 max_emulated_devices[0x8]; u8 max_num_virtio_queues[0x18]; - u8 reserved_at_a0[0x60]; + u8 reserved_at_a0[0x20]; + + u8 reserved_at_c0[0x13]; + u8 desc_group_mkey_supported[0x1]; + u8 reserved_at_d4[0xc]; + + u8 reserved_at_e0[0x20]; u8 umem_1_buffer_param_a[0x20]; @@ -1314,33 +1323,6 @@ struct mlx5_ifc_odp_cap_bits { u8 reserved_at_120[0x6E0]; }; -struct mlx5_ifc_calc_op { - u8 reserved_at_0[0x10]; - u8 reserved_at_10[0x9]; - u8 op_swap_endianness[0x1]; - u8 op_min[0x1]; - u8 op_xor[0x1]; - u8 op_or[0x1]; - u8 op_and[0x1]; - u8 op_max[0x1]; - u8 op_add[0x1]; -}; - -struct mlx5_ifc_vector_calc_cap_bits { - u8 calc_matrix[0x1]; - u8 reserved_at_1[0x1f]; - u8 reserved_at_20[0x8]; - u8 max_vec_count[0x8]; - u8 reserved_at_30[0xd]; - u8 max_chunk_size[0x3]; - struct mlx5_ifc_calc_op calc0; - struct mlx5_ifc_calc_op calc1; - struct mlx5_ifc_calc_op calc2; - struct mlx5_ifc_calc_op calc3; - - u8 reserved_at_c0[0x720]; -}; - struct mlx5_ifc_tls_cap_bits { u8 tls_1_2_aes_gcm_128[0x1]; u8 tls_1_3_aes_gcm_128[0x1]; @@ -1959,6 +1941,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 match_definer_format_supported[0x40]; }; +enum { + MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_TO_REMOTE_FLOW_TABLE_MISS = 0x80000, +}; + +enum { + MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE = 0x200, +}; + struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; @@ -1973,9 +1963,15 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_c0[0x8]; u8 migration_multi_load[0x1]; u8 migration_tracking_state[0x1]; - u8 reserved_at_ca[0x16]; + u8 reserved_at_ca[0x6]; + u8 migration_in_chunks[0x1]; + u8 reserved_at_d1[0xf]; + + u8 cross_vhca_object_to_object_supported[0x20]; + + u8 allowed_object_for_other_vhca_access[0x40]; - u8 reserved_at_e0[0xc0]; + u8 reserved_at_140[0x60]; u8 flow_table_type_2_type[0x8]; u8 reserved_at_1a8[0x3]; @@ -3435,20 +3431,6 @@ struct mlx5_ifc_roce_addr_layout_bits { u8 reserved_at_e0[0x20]; }; -struct mlx5_ifc_shampo_cap_bits { - u8 reserved_at_0[0x3]; - u8 shampo_log_max_reservation_size[0x5]; - u8 reserved_at_8[0x3]; - u8 shampo_log_min_reservation_size[0x5]; - u8 shampo_min_mss_size[0x10]; - - u8 reserved_at_20[0x3]; - u8 shampo_max_log_headers_entry_size[0x5]; - u8 reserved_at_28[0x18]; - - u8 reserved_at_40[0x7c0]; -}; - struct mlx5_ifc_crypto_cap_bits { u8 reserved_at_0[0x3]; u8 synchronize_dek[0x1]; @@ -3484,16 +3466,15 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_port_selection_cap_bits port_selection_cap; - struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; struct mlx5_ifc_qos_cap_bits qos_cap; struct mlx5_ifc_debug_cap_bits debug_cap; struct mlx5_ifc_fpga_cap_bits fpga_cap; struct mlx5_ifc_tls_cap_bits tls_cap; struct mlx5_ifc_device_mem_cap_bits device_mem_cap; struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap; - struct mlx5_ifc_shampo_cap_bits shampo_cap; struct mlx5_ifc_macsec_cap_bits macsec_cap; struct mlx5_ifc_crypto_cap_bits crypto_cap; + struct mlx5_ifc_ipsec_cap_bits ipsec_cap; u8 reserved_at_0[0x8000]; }; @@ -6409,6 +6390,28 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_allow_other_vhca_access_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + u8 reserved_at_40[0x50]; + u8 object_type_to_be_accessed[0x10]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_c0[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; +}; + +struct mlx5_ifc_allow_other_vhca_access_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + u8 syndrome[0x20]; + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_modify_header_arg_bits { u8 reserved_at_0[0x80]; @@ -6431,6 +6434,24 @@ struct mlx5_ifc_create_match_definer_out_bits { struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; }; +struct mlx5_ifc_alias_context_bits { + u8 vhca_id_to_be_accessed[0x10]; + u8 reserved_at_10[0xd]; + u8 status[0x3]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_40[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; + u8 metadata[0x80]; +}; + +struct mlx5_ifc_create_alias_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_alias_context_bits alias_ctx; +}; + enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, @@ -6665,9 +6686,12 @@ enum mlx5_reformat_ctx_type { MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5, MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL = 0x6, + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV4 = 0x7, MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8, MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2 = 0x9, + MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP = 0xa, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb, + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6 = 0xc, MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf, MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10, MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11, @@ -10193,7 +10217,9 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 mrtc[0x1]; u8 regs_44_to_32[0xd]; - u8 regs_31_to_0[0x20]; + u8 regs_31_to_10[0x16]; + u8 mtmp[0x1]; + u8 regs_8_to_0[0x9]; }; struct mlx5_ifc_mcam_access_reg_bits1 { @@ -10211,7 +10237,9 @@ struct mlx5_ifc_mcam_access_reg_bits2 { u8 mirc[0x1]; u8 regs_97_to_96[0x2]; - u8 regs_95_to_64[0x20]; + u8 regs_95_to_87[0x09]; + u8 synce_registers[0x2]; + u8 regs_84_to_64[0x15]; u8 regs_63_to_32[0x20]; @@ -10607,6 +10635,7 @@ enum { MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV = 0xe, MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR = 0x12, }; struct mlx5_ifc_initial_seg_bits { @@ -10853,8 +10882,9 @@ enum { MLX5_MFRL_REG_RESET_STATE_IDLE = 0, MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1, MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS = 2, - MLX5_MFRL_REG_RESET_STATE_TIMEOUT = 3, + MLX5_MFRL_REG_RESET_STATE_NEG_TIMEOUT = 3, MLX5_MFRL_REG_RESET_STATE_NACK = 4, + MLX5_MFRL_REG_RESET_STATE_UNLOAD_TIMEOUT = 5, }; enum { @@ -10946,6 +10976,15 @@ struct mlx5_ifc_mrtc_reg_bits { u8 time_l[0x20]; }; +struct mlx5_ifc_mtcap_reg_bits { + u8 reserved_at_0[0x19]; + u8 sensor_count[0x7]; + + u8 reserved_at_20[0x20]; + + u8 sensor_map[0x40]; +}; + struct mlx5_ifc_mtmp_reg_bits { u8 reserved_at_0[0x14]; u8 sensor_index[0xc]; @@ -11033,6 +11072,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mfrl_reg_bits mfrl_reg; struct mlx5_ifc_mtutc_reg_bits mtutc_reg; struct mlx5_ifc_mrtc_reg_bits mrtc_reg; + struct mlx5_ifc_mtcap_reg_bits mtcap_reg; struct mlx5_ifc_mtmp_reg_bits mtmp_reg; u8 reserved_at_0[0x60e0]; }; @@ -11941,6 +11981,7 @@ enum { MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; enum { @@ -11960,6 +12001,13 @@ enum { MLX5_IPSEC_ASO_INC_SN = 0x2, }; +enum { + MLX5_IPSEC_ASO_REPLAY_WIN_32BIT = 0x0, + MLX5_IPSEC_ASO_REPLAY_WIN_64BIT = 0x1, + MLX5_IPSEC_ASO_REPLAY_WIN_128BIT = 0x2, + MLX5_IPSEC_ASO_REPLAY_WIN_256BIT = 0x3, +}; + struct mlx5_ifc_ipsec_aso_bits { u8 valid[0x1]; u8 reserved_at_201[0x1]; @@ -12416,7 +12464,8 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits { u8 op_mod[0x10]; u8 incremental[0x1]; - u8 reserved_at_41[0xf]; + u8 chunk[0x1]; + u8 reserved_at_42[0xe]; u8 vhca_id[0x10]; u8 reserved_at_60[0x20]; @@ -12432,7 +12481,11 @@ struct mlx5_ifc_query_vhca_migration_state_out_bits { u8 required_umem_size[0x20]; - u8 reserved_at_a0[0x160]; + u8 reserved_at_a0[0x20]; + + u8 remaining_total_size[0x40]; + + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_save_vhca_state_in_bits { @@ -12464,7 +12517,7 @@ struct mlx5_ifc_save_vhca_state_out_bits { u8 actual_image_size[0x20]; - u8 reserved_at_60[0x20]; + u8 next_required_umem_size[0x20]; }; struct mlx5_ifc_load_vhca_state_in_bits { @@ -12573,4 +12626,59 @@ struct mlx5_ifc_modify_page_track_obj_in_bits { struct mlx5_ifc_page_track_bits obj_context; }; +struct mlx5_ifc_msecq_reg_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x12]; + u8 network_option[0x2]; + u8 local_ssm_code[0x4]; + u8 local_enhanced_ssm_code[0x8]; + + u8 local_clock_identity[0x40]; + + u8 reserved_at_80[0x180]; +}; + +enum { + MLX5_MSEES_FIELD_SELECT_ENABLE = BIT(0), + MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS = BIT(1), + MLX5_MSEES_FIELD_SELECT_ADMIN_FREQ_MEASURE = BIT(2), +}; + +enum mlx5_msees_admin_status { + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_ADMIN_STATUS_TRACK = 0x1, +}; + +enum mlx5_msees_oper_status { + MLX5_MSEES_OPER_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_OPER_STATUS_SELF_TRACK = 0x1, + MLX5_MSEES_OPER_STATUS_OTHER_TRACK = 0x2, + MLX5_MSEES_OPER_STATUS_HOLDOVER = 0x3, + MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER = 0x4, + MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5, +}; + +struct mlx5_ifc_msees_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 lp_msb[0x2]; + u8 reserved_at_14[0xc]; + + u8 field_select[0x20]; + + u8 admin_status[0x4]; + u8 oper_status[0x4]; + u8 ho_acq[0x1]; + u8 reserved_at_49[0xc]; + u8 admin_freq_measure[0x1]; + u8 oper_freq_measure[0x1]; + u8 failure_reason[0x9]; + + u8 frequency_diff[0x20]; + + u8 reserved_at_80[0x180]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 9becdc3fa503..b86d51a855f6 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -74,7 +74,11 @@ struct mlx5_ifc_virtio_q_bits { u8 reserved_at_320[0x8]; u8 pd[0x18]; - u8 reserved_at_340[0xc0]; + u8 reserved_at_340[0x20]; + + u8 desc_group_mkey[0x20]; + + u8 reserved_at_380[0x80]; }; struct mlx5_ifc_virtio_net_q_object_bits { @@ -141,6 +145,7 @@ enum { MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, + MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14, }; enum { diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 98b2e1e149f9..26092c78a985 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -115,8 +115,9 @@ enum mlx5e_ext_link_mode { MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, - MLX5E_400GAUI_8 = 15, + MLX5E_400GAUI_8_400GBASE_CR8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, + MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, MLX5E_EXT_LINK_MODES_NUMBER, }; diff --git a/include/linux/mm.h b/include/linux/mm.h index 34f9dba17c1a..da5219b48d52 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -319,11 +319,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) +#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #ifdef CONFIG_ARCH_HAS_PKEYS @@ -339,14 +341,27 @@ extern unsigned int kobjsize(const void *objp); #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ +#ifdef CONFIG_X86_USER_SHADOW_STACK +/* + * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of + * support core mm. + * + * These VMAs will get a single end guard page. This helps userspace protect + * itself from attacks. A single page is enough for current shadow stack archs + * (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c + * for more details on the guard size. + */ +# define VM_SHADOW_STACK VM_HIGH_ARCH_5 +#else +# define VM_SHADOW_STACK VM_NONE +#endif + #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ #elif defined(CONFIG_PPC) # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 -#elif defined(CONFIG_IA64) -# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_SPARC64) # define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ # define VM_ARCH_CLEAR VM_SPARC_ADI @@ -370,7 +385,7 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 37 +# define VM_UFFD_MINOR_BIT 38 # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ # define VM_UFFD_MINOR VM_NONE @@ -397,6 +412,8 @@ extern unsigned int kobjsize(const void *objp); #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif +#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) + #ifdef CONFIG_STACK_GROWSUP #define VM_STACK VM_GROWSUP #define VM_STACK_EARLY VM_GROWSDOWN @@ -532,13 +549,6 @@ struct vm_fault { */ }; -/* page entry size for vm->huge_fault() */ -enum page_entry_size { - PE_SIZE_PTE = 0, - PE_SIZE_PMD, - PE_SIZE_PUD, -}; - /* * These are the virtual MM functions - opening of an area, closing and * unmapping it (needed to keep files on disk up-to-date etc), pointer @@ -562,8 +572,7 @@ struct vm_operations_struct { int (*mprotect)(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long newflags); vm_fault_t (*fault)(struct vm_fault *vmf); - vm_fault_t (*huge_fault)(struct vm_fault *vmf, - enum page_entry_size pe_size); + vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order); vm_fault_t (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); unsigned long (*pagesize)(struct vm_area_struct * area); @@ -608,7 +617,7 @@ struct vm_operations_struct { * policy. */ struct mempolicy *(*get_policy)(struct vm_area_struct *vma, - unsigned long addr); + unsigned long addr, pgoff_t *ilx); #endif /* * Called by vm_normal_page() for special PTEs to find the @@ -679,6 +688,7 @@ static inline void vma_end_read(struct vm_area_struct *vma) rcu_read_unlock(); } +/* WARNING! Can only be used if mmap_lock is expected to be write-locked */ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) { mmap_assert_write_locked(vma->vm_mm); @@ -691,6 +701,11 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) return (vma->vm_lock_seq == *mm_lock_seq); } +/* + * Begin writing to a VMA. + * Exclude concurrent readers under the per-VMA lock until the currently + * write-locked mmap_lock is dropped or downgraded. + */ static inline void vma_start_write(struct vm_area_struct *vma) { int mm_lock_seq; @@ -709,26 +724,17 @@ static inline void vma_start_write(struct vm_area_struct *vma) up_write(&vma->vm_lock->lock); } -static inline bool vma_try_start_write(struct vm_area_struct *vma) +static inline void vma_assert_write_locked(struct vm_area_struct *vma) { int mm_lock_seq; - if (__is_vma_write_locked(vma, &mm_lock_seq)) - return true; - - if (!down_write_trylock(&vma->vm_lock->lock)) - return false; - - WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); - up_write(&vma->vm_lock->lock); - return true; + VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); } -static inline void vma_assert_write_locked(struct vm_area_struct *vma) +static inline void vma_assert_locked(struct vm_area_struct *vma) { - int mm_lock_seq; - - VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); + if (!rwsem_is_locked(&vma->vm_lock->lock)) + vma_assert_write_locked(vma); } static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) @@ -739,6 +745,22 @@ static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) vma->detached = detached; } +static inline void release_fault_lock(struct vm_fault *vmf) +{ + if (vmf->flags & FAULT_FLAG_VMA_LOCK) + vma_end_read(vmf->vma); + else + mmap_read_unlock(vmf->vma->vm_mm); +} + +static inline void assert_fault_locked(struct vm_fault *vmf) +{ + if (vmf->flags & FAULT_FLAG_VMA_LOCK) + vma_assert_locked(vmf->vma); + else + mmap_assert_locked(vmf->vma->vm_mm); +} + struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address); @@ -748,25 +770,40 @@ static inline bool vma_start_read(struct vm_area_struct *vma) { return false; } static inline void vma_end_read(struct vm_area_struct *vma) {} static inline void vma_start_write(struct vm_area_struct *vma) {} -static inline bool vma_try_start_write(struct vm_area_struct *vma) - { return true; } -static inline void vma_assert_write_locked(struct vm_area_struct *vma) {} +static inline void vma_assert_write_locked(struct vm_area_struct *vma) + { mmap_assert_write_locked(vma->vm_mm); } static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) {} +static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address) +{ + return NULL; +} + +static inline void release_fault_lock(struct vm_fault *vmf) +{ + mmap_read_unlock(vmf->vma->vm_mm); +} + +static inline void assert_fault_locked(struct vm_fault *vmf) +{ + mmap_assert_locked(vmf->vma->vm_mm); +} + #endif /* CONFIG_PER_VMA_LOCK */ +extern const struct vm_operations_struct vma_dummy_vm_ops; + /* * WARNING: vma_init does not initialize vma->vm_lock. * Use vm_area_alloc()/vm_area_free() if vma needs locking. */ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { - static const struct vm_operations_struct dummy_vm_ops = {}; - memset(vma, 0, sizeof(*vma)); vma->vm_mm = mm; - vma->vm_ops = &dummy_vm_ops; + vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); vma_mark_detached(vma, false); vma_numab_state_init(vma); @@ -779,18 +816,22 @@ static inline void vm_flags_init(struct vm_area_struct *vma, ACCESS_PRIVATE(vma, __vm_flags) = flags; } -/* Use when VMA is part of the VMA tree and modifications need coordination */ +/* + * Use when VMA is part of the VMA tree and modifications need coordination + * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and + * it should be locked explicitly beforehand. + */ static inline void vm_flags_reset(struct vm_area_struct *vma, vm_flags_t flags) { - vma_start_write(vma); + vma_assert_write_locked(vma); vm_flags_init(vma, flags); } static inline void vm_flags_reset_once(struct vm_area_struct *vma, vm_flags_t flags) { - vma_start_write(vma); + vma_assert_write_locked(vma); WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); } @@ -839,6 +880,31 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) return !vma->vm_ops; } +/* + * Indicate if the VMA is a heap for the given task; for + * /proc/PID/maps that is the heap of the main task. + */ +static inline bool vma_is_initial_heap(const struct vm_area_struct *vma) +{ + return vma->vm_start < vma->vm_mm->brk && + vma->vm_end > vma->vm_mm->start_brk; +} + +/* + * Indicate if the VMA is a stack for the given task; for + * /proc/PID/maps that is the stack of the main task. + */ +static inline bool vma_is_initial_stack(const struct vm_area_struct *vma) +{ + /* + * We make no effort to guess what a given thread considers to be + * its "stack". It's not even well-defined for programs written + * languages like Go. + */ + return vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack; +} + static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) { int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); @@ -869,6 +935,17 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) return vma->vm_flags & VM_ACCESS_FLAGS; } +static inline bool is_shared_maywrite(vm_flags_t vm_flags) +{ + return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == + (VM_SHARED | VM_MAYWRITE); +} + +static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) +{ + return is_shared_maywrite(vma->vm_flags); +} + static inline struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) { @@ -976,7 +1053,7 @@ struct inode; * compound_order() can be called without holding a reference, which means * that niceties like page_folio() don't work. These callers should be * prepared to handle wild return values. For example, PG_head may be - * set before _folio_order is initialised, or this may be a tail page. + * set before the order is initialised, or this may be a tail page. * See compaction.c for some good examples. */ static inline unsigned int compound_order(struct page *page) @@ -985,7 +1062,7 @@ static inline unsigned int compound_order(struct page *page) if (!test_bit(PG_head, &folio->flags)) return 0; - return folio->_folio_order; + return folio->_flags_1 & 0xff; } /** @@ -1001,7 +1078,7 @@ static inline unsigned int folio_order(struct folio *folio) { if (!folio_test_large(folio)) return 0; - return folio->_folio_order; + return folio->_flags_1 & 0xff; } #include <linux/huge_mm.h> @@ -1072,11 +1149,6 @@ unsigned long vmalloc_to_pfn(const void *addr); * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there * is no special casing required. */ - -#ifndef is_ioremap_addr -#define is_ioremap_addr(x) is_vmalloc_addr(x) -#endif - #ifdef CONFIG_MMU extern bool is_vmalloc_addr(const void *x); extern int is_vmalloc_or_module_addr(const void *x); @@ -1220,33 +1292,6 @@ void folio_copy(struct folio *dst, struct folio *src); unsigned long nr_free_buffer_pages(void); -/* - * Compound pages have a destructor function. Provide a - * prototype for that function and accessor functions. - * These are _only_ valid on the head of a compound page. - */ -typedef void compound_page_dtor(struct page *); - -/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */ -enum compound_dtor_id { - NULL_COMPOUND_DTOR, - COMPOUND_PAGE_DTOR, -#ifdef CONFIG_HUGETLB_PAGE - HUGETLB_PAGE_DTOR, -#endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - TRANSHUGE_PAGE_DTOR, -#endif - NR_COMPOUND_DTORS, -}; - -static inline void folio_set_compound_dtor(struct folio *folio, - enum compound_dtor_id compound_dtor) -{ - VM_BUG_ON_FOLIO(compound_dtor >= NR_COMPOUND_DTORS, folio); - folio->_folio_dtor = compound_dtor; -} - void destroy_large_folio(struct folio *folio); /* Returns the number of bytes in this potentially compound page. */ @@ -1282,8 +1327,6 @@ static inline unsigned long thp_size(struct page *page) return PAGE_SIZE << thp_order(page); } -void free_compound_page(struct page *page); - #ifdef CONFIG_MMU /* * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when @@ -1294,15 +1337,15 @@ void free_compound_page(struct page *page); static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) { if (likely(vma->vm_flags & VM_WRITE)) - pte = pte_mkwrite(pte); + pte = pte_mkwrite(pte, vma); return pte; } vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); -void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr); +void set_pte_range(struct vm_fault *vmf, struct folio *folio, + struct page *page, unsigned int nr, unsigned long addr); vm_fault_t finish_fault(struct vm_fault *vmf); -vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #endif /* @@ -1651,26 +1694,26 @@ static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) #define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid) #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS -static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { - return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK); + return xchg(&folio->_last_cpupid, cpupid & LAST_CPUPID_MASK); } -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return page->_last_cpupid; + return folio->_last_cpupid; } static inline void page_cpupid_reset_last(struct page *page) { page->_last_cpupid = -1 & LAST_CPUPID_MASK; } #else -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; + return (folio->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; } -extern int page_cpupid_xchg_last(struct page *page, int cpupid); +int folio_xchg_last_cpupid(struct folio *folio, int cpupid); static inline void page_cpupid_reset_last(struct page *page) { @@ -1678,11 +1721,12 @@ static inline void page_cpupid_reset_last(struct page *page) } #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ -static inline int xchg_page_access_time(struct page *page, int time) +static inline int folio_xchg_access_time(struct folio *folio, int time) { int last_time; - last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS); + last_time = folio_xchg_last_cpupid(folio, + time >> PAGE_ACCESS_TIME_BUCKETS); return last_time << PAGE_ACCESS_TIME_BUCKETS; } @@ -1691,24 +1735,24 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) unsigned int pid_bit; pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG)); - if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids[1])) { - __set_bit(pid_bit, &vma->numab_state->access_pids[1]); + if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[1])) { + __set_bit(pid_bit, &vma->numab_state->pids_active[1]); } } #else /* !CONFIG_NUMA_BALANCING */ -static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { - return page_to_nid(page); /* XXX */ + return folio_nid(folio); /* XXX */ } -static inline int xchg_page_access_time(struct page *page, int time) +static inline int folio_xchg_access_time(struct folio *folio, int time) { return 0; } -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return page_to_nid(page); /* XXX */ + return folio_nid(folio); /* XXX */ } static inline int cpupid_to_nid(int cpupid) @@ -2006,7 +2050,7 @@ static inline long folio_nr_pages(struct folio *folio) #ifdef CONFIG_64BIT return folio->_folio_nr_pages; #else - return 1L << folio->_folio_order; + return 1L << (folio->_flags_1 & 0xff); #endif } @@ -2024,7 +2068,7 @@ static inline unsigned long compound_nr(struct page *page) #ifdef CONFIG_64BIT return folio->_folio_nr_pages; #else - return 1L << folio->_folio_order; + return 1L << (folio->_flags_1 & 0xff); #endif } @@ -2170,7 +2214,6 @@ static inline void *folio_address(const struct folio *folio) return page_address(&folio->page); } -extern void *page_rmapping(struct page *page); extern pgoff_t __page_file_index(struct page *page); /* @@ -2238,18 +2281,6 @@ extern void pagefault_out_of_memory(void); #define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1)) /* - * Flags passed to show_mem() and show_free_areas() to suppress output in - * various contexts. - */ -#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ - -extern void __show_free_areas(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); -static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodemask) -{ - __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); -} - -/* * Parameter block passed down to zap_pte_range in exceptional cases. */ struct zap_details { @@ -2305,6 +2336,8 @@ struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t pmd); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); @@ -2317,9 +2350,9 @@ static inline void zap_vma_pages(struct vm_area_struct *vma) zap_page_range_single(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL); } -void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, +void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, struct vm_area_struct *start_vma, unsigned long start, - unsigned long end, bool mm_wr_locked); + unsigned long end, unsigned long tree_end, bool mm_wr_locked); struct mmu_notifier_range; @@ -2391,8 +2424,6 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); -extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, unsigned int gup_flags); long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, @@ -2403,6 +2434,9 @@ long pin_user_pages_remote(struct mm_struct *mm, unsigned int gup_flags, struct page **pages, int *locked); +/* + * Retrieves a single page alongside its VMA. Does not support FOLL_NOWAIT. + */ static inline struct page *get_user_page_vma_remote(struct mm_struct *mm, unsigned long addr, int gup_flags, @@ -2410,12 +2444,15 @@ static inline struct page *get_user_page_vma_remote(struct mm_struct *mm, { struct page *page; struct vm_area_struct *vma; - int got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL); + int got; + + if (WARN_ON_ONCE(unlikely(gup_flags & FOLL_NOWAIT))) + return ERR_PTR(-EINVAL); + + got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL); if (got < 0) return ERR_PTR(got); - if (got == 0) - return NULL; vma = vma_lookup(mm, addr); if (WARN_ON_ONCE(!vma)) { @@ -2458,7 +2495,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen); extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, - bool need_rmap_locks); + bool need_rmap_locks, bool for_stack); /* * Flags used by change_protection(). For now we make it a bitmap so @@ -2606,14 +2643,6 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, *maxrss = hiwater_rss; } -#if defined(SPLIT_RSS_COUNTING) -void sync_mm_rss(struct mm_struct *mm); -#else -static inline void sync_mm_rss(struct mm_struct *mm) -{ -} -#endif - #ifndef CONFIG_ARCH_HAS_PTE_SPECIAL static inline int pte_special(pte_t pte) { @@ -2766,42 +2795,93 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } #endif /* CONFIG_MMU */ +static inline struct ptdesc *virt_to_ptdesc(const void *x) +{ + return page_ptdesc(virt_to_page(x)); +} + +static inline void *ptdesc_to_virt(const struct ptdesc *pt) +{ + return page_to_virt(ptdesc_page(pt)); +} + +static inline void *ptdesc_address(const struct ptdesc *pt) +{ + return folio_address(ptdesc_folio(pt)); +} + +static inline bool pagetable_is_reserved(struct ptdesc *pt) +{ + return folio_test_reserved(ptdesc_folio(pt)); +} + +/** + * pagetable_alloc - Allocate pagetables + * @gfp: GFP flags + * @order: desired pagetable order + * + * pagetable_alloc allocates memory for page tables as well as a page table + * descriptor to describe that memory. + * + * Return: The ptdesc describing the allocated page tables. + */ +static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order) +{ + struct page *page = alloc_pages(gfp | __GFP_COMP, order); + + return page_ptdesc(page); +} + +/** + * pagetable_free - Free pagetables + * @pt: The page table descriptor + * + * pagetable_free frees the memory of all page tables described by a page + * table descriptor and the memory for the descriptor itself. + */ +static inline void pagetable_free(struct ptdesc *pt) +{ + struct page *page = ptdesc_page(pt); + + __free_pages(page, compound_order(page)); +} + #if USE_SPLIT_PTE_PTLOCKS #if ALLOC_SPLIT_PTLOCKS void __init ptlock_cache_init(void); -extern bool ptlock_alloc(struct page *page); -extern void ptlock_free(struct page *page); +bool ptlock_alloc(struct ptdesc *ptdesc); +void ptlock_free(struct ptdesc *ptdesc); -static inline spinlock_t *ptlock_ptr(struct page *page) +static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) { - return page->ptl; + return ptdesc->ptl; } #else /* ALLOC_SPLIT_PTLOCKS */ static inline void ptlock_cache_init(void) { } -static inline bool ptlock_alloc(struct page *page) +static inline bool ptlock_alloc(struct ptdesc *ptdesc) { return true; } -static inline void ptlock_free(struct page *page) +static inline void ptlock_free(struct ptdesc *ptdesc) { } -static inline spinlock_t *ptlock_ptr(struct page *page) +static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) { - return &page->ptl; + return &ptdesc->ptl; } #endif /* ALLOC_SPLIT_PTLOCKS */ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(pmd_page(*pmd)); + return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } -static inline bool ptlock_init(struct page *page) +static inline bool ptlock_init(struct ptdesc *ptdesc) { /* * prep_new_page() initialize page->private (and therefore page->ptl) @@ -2810,10 +2890,10 @@ static inline bool ptlock_init(struct page *page) * It can happen if arch try to use slab for page table allocation: * slab code uses page->slab_cache, which share storage with page->ptl. */ - VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); - if (!ptlock_alloc(page)) + VM_BUG_ON_PAGE(*(unsigned long *)&ptdesc->ptl, ptdesc_page(ptdesc)); + if (!ptlock_alloc(ptdesc)) return false; - spin_lock_init(ptlock_ptr(page)); + spin_lock_init(ptlock_ptr(ptdesc)); return true; } @@ -2826,24 +2906,28 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return &mm->page_table_lock; } static inline void ptlock_cache_init(void) {} -static inline bool ptlock_init(struct page *page) { return true; } -static inline void ptlock_free(struct page *page) {} +static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } +static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ -static inline bool pgtable_pte_page_ctor(struct page *page) +static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) { - if (!ptlock_init(page)) + struct folio *folio = ptdesc_folio(ptdesc); + + if (!ptlock_init(ptdesc)) return false; - __SetPageTable(page); - inc_lruvec_page_state(page, NR_PAGETABLE); + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); return true; } -static inline void pgtable_pte_page_dtor(struct page *page) +static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) { - ptlock_free(page); - __ClearPageTable(page); - dec_lruvec_page_state(page, NR_PAGETABLE); + struct folio *folio = ptdesc_folio(ptdesc); + + ptlock_free(ptdesc); + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); } pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); @@ -2892,28 +2976,33 @@ static inline struct page *pmd_pgtable_page(pmd_t *pmd) return virt_to_page((void *)((unsigned long) pmd & mask)); } +static inline struct ptdesc *pmd_ptdesc(pmd_t *pmd) +{ + return page_ptdesc(pmd_pgtable_page(pmd)); +} + static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(pmd_pgtable_page(pmd)); + return ptlock_ptr(pmd_ptdesc(pmd)); } -static inline bool pmd_ptlock_init(struct page *page) +static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - page->pmd_huge_pte = NULL; + ptdesc->pmd_huge_pte = NULL; #endif - return ptlock_init(page); + return ptlock_init(ptdesc); } -static inline void pmd_ptlock_free(struct page *page) +static inline void pmd_ptlock_free(struct ptdesc *ptdesc) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON_PAGE(page->pmd_huge_pte, page); + VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc)); #endif - ptlock_free(page); + ptlock_free(ptdesc); } -#define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte) +#define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) #else @@ -2922,8 +3011,8 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) return &mm->page_table_lock; } -static inline bool pmd_ptlock_init(struct page *page) { return true; } -static inline void pmd_ptlock_free(struct page *page) {} +static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { return true; } +static inline void pmd_ptlock_free(struct ptdesc *ptdesc) {} #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) @@ -2936,20 +3025,24 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) return ptl; } -static inline bool pgtable_pmd_page_ctor(struct page *page) +static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc) { - if (!pmd_ptlock_init(page)) + struct folio *folio = ptdesc_folio(ptdesc); + + if (!pmd_ptlock_init(ptdesc)) return false; - __SetPageTable(page); - inc_lruvec_page_state(page, NR_PAGETABLE); + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); return true; } -static inline void pgtable_pmd_page_dtor(struct page *page) +static inline void pagetable_pmd_dtor(struct ptdesc *ptdesc) { - pmd_ptlock_free(page); - __ClearPageTable(page); - dec_lruvec_page_state(page, NR_PAGETABLE); + struct folio *folio = ptdesc_folio(ptdesc); + + pmd_ptlock_free(ptdesc); + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); } /* @@ -2971,6 +3064,22 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) return ptl; } +static inline void pagetable_pud_ctor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); +} + +static inline void pagetable_pud_dtor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); +} + extern void __init pagecache_init(void); extern void free_initmem(void); @@ -3004,6 +3113,11 @@ static inline void mark_page_reserved(struct page *page) adjust_managed_page_count(page, -1); } +static inline void free_reserved_ptdesc(struct ptdesc *pt) +{ + free_reserved_page(ptdesc_page(pt)); +} + /* * Default method to free all the __init memory into the buddy system. * The freed pages will be poisoned with pattern "poison" if it's within @@ -3069,9 +3183,9 @@ extern void mem_init(void); extern void __init mmap_init(void); extern void __show_mem(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); -static inline void show_mem(unsigned int flags, nodemask_t *nodemask) +static inline void show_mem(void) { - __show_mem(flags, nodemask, MAX_NR_ZONES - 1); + __show_mem(0, NULL, MAX_NR_ZONES - 1); } extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); @@ -3130,22 +3244,73 @@ extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct *next); extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff); -extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, - struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, - unsigned long end, unsigned long vm_flags, struct anon_vma *, - struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, - struct anon_vma_name *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); -extern int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *, - unsigned long addr, int new_below); -extern int split_vma(struct vma_iterator *vmi, struct vm_area_struct *, - unsigned long addr, int new_below); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); +struct vm_area_struct *vma_modify(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long vm_flags, + struct mempolicy *policy, + struct vm_userfaultfd_ctx uffd_ctx, + struct anon_vma_name *anon_name); + +/* We are about to modify the VMA's flags. */ +static inline struct vm_area_struct +*vma_modify_flags(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long new_flags) +{ + return vma_modify(vmi, prev, vma, start, end, new_flags, + vma_policy(vma), vma->vm_userfaultfd_ctx, + anon_vma_name(vma)); +} + +/* We are about to modify the VMA's flags and/or anon_name. */ +static inline struct vm_area_struct +*vma_modify_flags_name(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long new_flags, + struct anon_vma_name *new_name) +{ + return vma_modify(vmi, prev, vma, start, end, new_flags, + vma_policy(vma), vma->vm_userfaultfd_ctx, new_name); +} + +/* We are about to modify the VMA's memory policy. */ +static inline struct vm_area_struct +*vma_modify_policy(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct mempolicy *new_pol) +{ + return vma_modify(vmi, prev, vma, start, end, vma->vm_flags, + new_pol, vma->vm_userfaultfd_ctx, anon_vma_name(vma)); +} + +/* We are about to modify the VMA's flags and/or uffd context. */ +static inline struct vm_area_struct +*vma_modify_flags_uffd(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long new_flags, + struct vm_userfaultfd_ctx new_ctx) +{ + return vma_modify(vmi, prev, vma, start, end, new_flags, + vma_policy(vma), new_ctx, anon_vma_name(vma)); +} static inline int check_data_rlimit(unsigned long rlim, unsigned long new, @@ -3193,7 +3358,8 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr, struct list_head *uf); extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long pgoff, unsigned long *populate, struct list_head *uf); + vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, + struct list_head *uf); extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool unlock); @@ -3216,8 +3382,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) static inline void mm_populate(unsigned long addr, unsigned long len) {} #endif -/* These take the mm semaphore themselves */ -extern int __must_check vm_brk(unsigned long, unsigned long); +/* This takes the mm semaphore itself */ extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, @@ -3281,15 +3446,26 @@ struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) return mtree_load(&mm->mm_mt, addr); } +static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_GROWSDOWN) + return stack_guard_gap; + + /* See reasoning around the VM_SHADOW_STACK definition */ + if (vma->vm_flags & VM_SHADOW_STACK) + return PAGE_SIZE; + + return 0; +} + static inline unsigned long vm_start_gap(struct vm_area_struct *vma) { + unsigned long gap = stack_guard_start_gap(vma); unsigned long vm_start = vma->vm_start; - if (vma->vm_flags & VM_GROWSDOWN) { - vm_start -= stack_guard_gap; - if (vm_start > vma->vm_start) - vm_start = 0; - } + vm_start -= gap; + if (vm_start > vma->vm_start) + vm_start = 0; return vm_start; } @@ -3403,6 +3579,24 @@ static inline vm_fault_t vmf_error(int err) return VM_FAULT_SIGBUS; } +/* + * Convert errno to return value for ->page_mkwrite() calls. + * + * This should eventually be merged with vmf_error() above, but will need a + * careful audit of all vmf_error() callers. + */ +static inline vm_fault_t vmf_fs_error(int err) +{ + if (err == 0) + return VM_FAULT_LOCKED; + if (err == -EFAULT || err == -EAGAIN) + return VM_FAULT_NOPAGE; + if (err == -ENOMEM) + return VM_FAULT_OOM; + /* -ENOSPC, -EDQUOT, -EIO ... */ + return VM_FAULT_SIGBUS; +} + struct page *follow_page(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags); @@ -3509,8 +3703,8 @@ static inline bool debug_pagealloc_enabled(void) } /* - * For use in fast paths after init_debug_pagealloc() has run, or when a - * false negative result is not harmful when called too early. + * For use in fast paths after mem_debugging_and_hardening_init() has run, + * or when a false negative result is not harmful when called too early. */ static inline bool debug_pagealloc_enabled_static(void) { @@ -3665,13 +3859,32 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap); #endif -#ifdef CONFIG_ARCH_WANT_OPTIMIZE_VMEMMAP -static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, - struct dev_pagemap *pgmap) +#define VMEMMAP_RESERVE_NR 2 +#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP +static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap, + struct dev_pagemap *pgmap) { - return is_power_of_2(sizeof(struct page)) && - pgmap && (pgmap_vmemmap_nr(pgmap) > 1) && !altmap; + unsigned long nr_pages; + unsigned long nr_vmemmap_pages; + + if (!pgmap || !is_power_of_2(sizeof(struct page))) + return false; + + nr_pages = pgmap_vmemmap_nr(pgmap); + nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT); + /* + * For vmemmap optimization with DAX we need minimum 2 vmemmap + * pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst + */ + return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR); } +/* + * If we don't have an architecture override, use the generic rule + */ +#ifndef vmemmap_can_optimize +#define vmemmap_can_optimize __vmemmap_can_optimize +#endif + #else static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) @@ -3860,25 +4073,26 @@ static inline void mem_dump_obj(void *object) {} #endif /** - * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it + * seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and + * handle them. * @seals: the seals to check * @vma: the vma to operate on * - * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on - * the vma flags. Return 0 if check pass, or <0 for errors. + * Check whether F_SEAL_WRITE or F_SEAL_FUTURE_WRITE are set; if so, do proper + * check/handling on the vma flags. Return 0 if check pass, or <0 for errors. */ -static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) +static inline int seal_check_write(int seals, struct vm_area_struct *vma) { - if (seals & F_SEAL_FUTURE_WRITE) { + if (seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { /* * New PROT_WRITE and MAP_SHARED mmaps are not allowed when - * "future write" seal active. + * write seals are active. */ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) return -EPERM; /* - * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as + * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as * MAP_SHARED and read-only, take care to not allow mprotect to * revert protections on such mappings. Do this only for shared * mappings. For private mappings, don't need to mask @@ -3922,4 +4136,11 @@ static inline void accept_memory(phys_addr_t start, phys_addr_t end) #endif +static inline bool pfn_is_unaccepted_memory(unsigned long pfn) +{ + phys_addr_t paddr = pfn << PAGE_SHIFT; + + return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); +} + #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 21d6c72bcc71..f4fe593c1400 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -4,6 +4,7 @@ #include <linux/atomic.h> #include <linux/huge_mm.h> +#include <linux/mm_types.h> #include <linux/swap.h> #include <linux/string.h> #include <linux/userfaultfd_k.h> @@ -231,22 +232,27 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, if (folio_test_unevictable(folio) || !lrugen->enabled) return false; /* - * There are three common cases for this page: - * 1. If it's hot, e.g., freshly faulted in or previously hot and - * migrated, add it to the youngest generation. - * 2. If it's cold but can't be evicted immediately, i.e., an anon page - * not in swapcache or a dirty page pending writeback, add it to the - * second oldest generation. - * 3. Everything else (clean, cold) is added to the oldest generation. + * There are four common cases for this page: + * 1. If it's hot, i.e., freshly faulted in, add it to the youngest + * generation, and it's protected over the rest below. + * 2. If it can't be evicted immediately, i.e., a dirty page pending + * writeback, add it to the second youngest generation. + * 3. If it should be evicted first, e.g., cold and clean from + * folio_rotate_reclaimable(), add it to the oldest generation. + * 4. Everything else falls between 2 & 3 above and is added to the + * second oldest generation if it's considered inactive, or the + * oldest generation otherwise. See lru_gen_is_active(). */ if (folio_test_active(folio)) seq = lrugen->max_seq; else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || (folio_test_reclaim(folio) && (folio_test_dirty(folio) || folio_test_writeback(folio)))) - seq = lrugen->min_seq[type] + 1; - else + seq = lrugen->max_seq - 1; + else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq) seq = lrugen->min_seq[type]; + else + seq = lrugen->min_seq[type] + 1; gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; @@ -352,15 +358,6 @@ void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) } #ifdef CONFIG_ANON_VMA_NAME -/* - * mmap_lock should be read-locked when calling anon_vma_name(). Caller should - * either keep holding the lock while using the returned pointer or it should - * raise anon_vma_name refcount before releasing the lock. - */ -extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); -extern struct anon_vma_name *anon_vma_name_alloc(const char *name); -extern void anon_vma_name_free(struct kref *kref); - /* mmap_lock should be read-locked */ static inline void anon_vma_name_get(struct anon_vma_name *anon_name) { @@ -415,16 +412,6 @@ static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, } #else /* CONFIG_ANON_VMA_NAME */ -static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) -{ - return NULL; -} - -static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) -{ - return NULL; -} - static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {} static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {} static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, @@ -523,6 +510,27 @@ static inline bool mm_tlb_flush_nested(struct mm_struct *mm) return atomic_read(&mm->tlb_flush_pending) > 1; } +#ifdef CONFIG_MMU +/* + * Computes the pte marker to copy from the given source entry into dst_vma. + * If no marker should be copied, returns 0. + * The caller should insert a new pte created with make_pte_marker(). + */ +static inline pte_marker copy_pte_marker( + swp_entry_t entry, struct vm_area_struct *dst_vma) +{ + pte_marker srcm = pte_marker_get(entry); + /* Always copy error entries. */ + pte_marker dstm = srcm & PTE_MARKER_POISONED; + + /* Only copy PTE markers if UFFD register matches. */ + if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma)) + dstm |= PTE_MARKER_UFFD_WP; + + return dstm; +} +#endif + /* * If this pte is wr-protected by uffd-wp in any form, arm the special pte to * replace a none pte. NOTE! This should only be called when *pte is already diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7d30dc4ff0ff..957ce38768b2 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -125,36 +125,11 @@ struct page { struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; - union { - /** - * dma_addr_upper: might require a 64-bit - * value on 32-bit architectures. - */ - unsigned long dma_addr_upper; - /** - * For frag page support, not supported in - * 32-bit architectures with 64-bit DMA. - */ - atomic_long_t pp_frag_count; - }; + atomic_long_t pp_frag_count; }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ }; - struct { /* Page table pages */ - unsigned long _pt_pad_1; /* compound_head */ - pgtable_t pmd_huge_pte; /* protected by page->ptl */ - unsigned long _pt_pad_2; /* mapping */ - union { - struct mm_struct *pt_mm; /* x86 pgds only */ - atomic_t pt_frag_refcount; /* powerpc */ - }; -#if ALLOC_SPLIT_PTLOCKS - spinlock_t *ptl; -#else - spinlock_t ptl; -#endif - }; struct { /* ZONE_DEVICE pages */ /** @pgmap: Points to the hosting device page map. */ struct dev_pagemap *pgmap; @@ -213,6 +188,10 @@ struct page { not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS + int _last_cpupid; +#endif + #ifdef CONFIG_KMSAN /* * KMSAN metadata for this page: @@ -224,10 +203,6 @@ struct page { struct page *kmsan_shadow; struct page *kmsan_origin; #endif - -#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS - int _last_cpupid; -#endif } _struct_page_alignment; /* @@ -262,6 +237,14 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page); } +/* + * A swap entry has to fit into a "unsigned long", as the entry is hidden + * in the "index" field of the swapper address space. + */ +typedef struct { + unsigned long val; +} swp_entry_t; + /** * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. @@ -272,14 +255,14 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @index: Offset within the file, in units of pages. For anonymous memory, * this is the index from the beginning of the mmap. * @private: Filesystem per-folio data (see folio_attach_private()). - * Used for swp_entry_t if folio_test_swapcache(). + * @swap: Used for swp_entry_t if folio_test_swapcache(). * @_mapcount: Do not access this member directly. Use folio_mapcount() to * find out how many times this folio is mapped by userspace. * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. - * @_folio_dtor: Which destructor to use for this folio. - * @_folio_order: Do not use directly, call folio_order(). + * @virtual: Virtual address in the kernel direct map. + * @_last_cpupid: IDs of last CPU and last process that accessed the folio. * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). @@ -317,12 +300,21 @@ struct folio { }; struct address_space *mapping; pgoff_t index; - void *private; + union { + void *private; + swp_entry_t swap; + }; atomic_t _mapcount; atomic_t _refcount; #ifdef CONFIG_MEMCG unsigned long memcg_data; #endif +#if defined(WANT_PAGE_VIRTUAL) + void *virtual; +#endif +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS + int _last_cpupid; +#endif /* private: the union with struct page is transitional */ }; struct page page; @@ -331,9 +323,8 @@ struct folio { struct { unsigned long _flags_1; unsigned long _head_1; + unsigned long _folio_avail; /* public: */ - unsigned char _folio_dtor; - unsigned char _folio_order; atomic_t _entire_mapcount; atomic_t _nr_pages_mapped; atomic_t _pincount; @@ -379,6 +370,12 @@ FOLIO_MATCH(_refcount, _refcount); #ifdef CONFIG_MEMCG FOLIO_MATCH(memcg_data, memcg_data); #endif +#if defined(WANT_PAGE_VIRTUAL) +FOLIO_MATCH(virtual, virtual); +#endif +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS +FOLIO_MATCH(_last_cpupid, _last_cpupid); +#endif #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ @@ -391,8 +388,89 @@ FOLIO_MATCH(compound_head, _head_1); offsetof(struct page, pg) + 2 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); +FOLIO_MATCH(flags, _flags_2a); +FOLIO_MATCH(compound_head, _head_2a); #undef FOLIO_MATCH +/** + * struct ptdesc - Memory descriptor for page tables. + * @__page_flags: Same as page flags. Unused for page tables. + * @pt_rcu_head: For freeing page table pages. + * @pt_list: List of used page tables. Used for s390 and x86. + * @_pt_pad_1: Padding that aliases with page's compound head. + * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. + * @__page_mapping: Aliases with page->mapping. Unused for page tables. + * @pt_mm: Used for x86 pgds. + * @pt_frag_refcount: For fragmented page table tracking. Powerpc and s390 only. + * @_pt_pad_2: Padding to ensure proper alignment. + * @ptl: Lock for the page table. + * @__page_type: Same as page->page_type. Unused for page tables. + * @_refcount: Same as page refcount. Used for s390 page tables. + * @pt_memcg_data: Memcg data. Tracked for page tables here. + * + * This struct overlays struct page for now. Do not modify without a good + * understanding of the issues. + */ +struct ptdesc { + unsigned long __page_flags; + + union { + struct rcu_head pt_rcu_head; + struct list_head pt_list; + struct { + unsigned long _pt_pad_1; + pgtable_t pmd_huge_pte; + }; + }; + unsigned long __page_mapping; + + union { + struct mm_struct *pt_mm; + atomic_t pt_frag_refcount; + }; + + union { + unsigned long _pt_pad_2; +#if ALLOC_SPLIT_PTLOCKS + spinlock_t *ptl; +#else + spinlock_t ptl; +#endif + }; + unsigned int __page_type; + atomic_t _refcount; +#ifdef CONFIG_MEMCG + unsigned long pt_memcg_data; +#endif +}; + +#define TABLE_MATCH(pg, pt) \ + static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) +TABLE_MATCH(flags, __page_flags); +TABLE_MATCH(compound_head, pt_list); +TABLE_MATCH(compound_head, _pt_pad_1); +TABLE_MATCH(mapping, __page_mapping); +TABLE_MATCH(rcu_head, pt_rcu_head); +TABLE_MATCH(page_type, __page_type); +TABLE_MATCH(_refcount, _refcount); +#ifdef CONFIG_MEMCG +TABLE_MATCH(memcg_data, pt_memcg_data); +#endif +#undef TABLE_MATCH +static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); + +#define ptdesc_page(pt) (_Generic((pt), \ + const struct ptdesc *: (const struct page *)(pt), \ + struct ptdesc *: (struct page *)(pt))) + +#define ptdesc_folio(pt) (_Generic((pt), \ + const struct ptdesc *: (const struct folio *)(pt), \ + struct ptdesc *: (struct folio *)(pt))) + +#define page_ptdesc(p) (_Generic((p), \ + const struct page *: (const struct ptdesc *)(p), \ + struct page *: (struct ptdesc *)(p))) + /* * Used for sizing the vmemmap region on some architectures */ @@ -471,14 +549,62 @@ struct anon_vma_name { char name[]; }; +#ifdef CONFIG_ANON_VMA_NAME +/* + * mmap_lock should be read-locked when calling anon_vma_name(). Caller should + * either keep holding the lock while using the returned pointer or it should + * raise anon_vma_name refcount before releasing the lock. + */ +struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); +struct anon_vma_name *anon_vma_name_alloc(const char *name); +void anon_vma_name_free(struct kref *kref); +#else /* CONFIG_ANON_VMA_NAME */ +static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + +static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) +{ + return NULL; +} +#endif + struct vma_lock { struct rw_semaphore lock; }; struct vma_numab_state { + /* + * Initialised as time in 'jiffies' after which VMA + * should be scanned. Delays first scan of new VMA by at + * least sysctl_numa_balancing_scan_delay: + */ unsigned long next_scan; - unsigned long next_pid_reset; - unsigned long access_pids[2]; + + /* + * Time in jiffies when pids_active[] is reset to + * detect phase change behaviour: + */ + unsigned long pids_active_reset; + + /* + * Approximate tracking of PIDs that trapped a NUMA hinting + * fault. May produce false positives due to hash collisions. + * + * [0] Previous PID tracking + * [1] Current PID tracking + * + * Window moves after next_pid_reset has expired approximately + * every VMA_PID_RESET_PERIOD jiffies: + */ + unsigned long pids_active[2]; + + /* + * MM scan sequence ID when the VMA was last completely scanned. + * A VMA is not eligible for scanning if prev_scan_seq == numa_scan_seq + */ + int prev_scan_seq; }; /* @@ -587,6 +713,12 @@ struct vm_area_struct { struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; +#ifdef CONFIG_NUMA +#define vma_policy(vma) ((vma)->vm_policy) +#else +#define vma_policy(vma) NULL +#endif + #ifdef CONFIG_SCHED_MM_CID struct mm_cid { u64 time; @@ -812,7 +944,7 @@ struct mm_struct { #ifdef CONFIG_KSM /* * Represent how many pages of this process are involved in KSM - * merging. + * merging (not including ksm_zero_pages). */ unsigned long ksm_merging_pages; /* @@ -820,7 +952,12 @@ struct mm_struct { * including merged and not merged. */ unsigned long ksm_rmap_items; -#endif + /* + * Represent how many empty pages are merged with kernel zero + * pages when enabling KSM use_zero_pages. + */ + unsigned long ksm_zero_pages; +#endif /* CONFIG_KSM */ #ifdef CONFIG_LRU_GEN struct { /* this mm_struct is on lru_gen_mm_list */ @@ -1105,7 +1242,8 @@ enum vm_fault_reason { { VM_FAULT_RETRY, "RETRY" }, \ { VM_FAULT_FALLBACK, "FALLBACK" }, \ { VM_FAULT_DONE_COW, "DONE_COW" }, \ - { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" } + { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }, \ + { VM_FAULT_COMPLETED, "COMPLETED" } struct vm_special_mapping { const char *name; /* The name, e.g. "[vdso]". */ @@ -1139,14 +1277,6 @@ enum tlb_flush_reason { NR_TLB_FLUSH_REASONS, }; - /* - * A swap entry has to fit into a "unsigned long", as the entry is hidden - * in the "index" field of the swapper address space. - */ -typedef struct { - unsigned long val; -} swp_entry_t; - /** * enum fault_flag - Fault flag definitions. * @FAULT_FLAG_WRITE: Fault was a write fault. diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index 5414b5c6a103..aa44fff8bb9d 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -52,8 +52,8 @@ struct tlbflush_unmap_batch { #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* * The arch code makes the following promise: generic code can modify a - * PTE, then call arch_tlbbatch_add_mm() (which internally provides all - * needed barriers), then call arch_tlbbatch_flush(), and the entries + * PTE, then call arch_tlbbatch_add_pending() (which internally provides + * all needed barriers), then call arch_tlbbatch_flush(), and the entries * will be flushed on all CPUs by the time that arch_tlbbatch_flush() * returns. */ diff --git a/include/linux/mman.h b/include/linux/mman.h index cee1e4b566d8..40d94411d492 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -15,6 +15,9 @@ #ifndef MAP_32BIT #define MAP_32BIT 0 #endif +#ifndef MAP_ABOVE4G +#define MAP_ABOVE4G 0 +#endif #ifndef MAP_HUGE_2MB #define MAP_HUGE_2MB 0 #endif @@ -50,6 +53,7 @@ | MAP_STACK \ | MAP_HUGETLB \ | MAP_32BIT \ + | MAP_ABOVE4G \ | MAP_HUGE_2MB \ | MAP_HUGE_1GB) diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index e05e167dbd16..8d38dcb6d044 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -73,6 +73,14 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm) } #ifdef CONFIG_PER_VMA_LOCK +/* + * Drop all currently-held per-VMA locks. + * This is called from the mmap_lock implementation directly before releasing + * a write-locked mmap_lock (or downgrading it to read-locked). + * This should normally NOT be called manually from other places. + * If you want to call this manually anyway, keep in mind that this will release + * *all* VMA write locks, including ones from further up the stack. + */ static inline void vma_end_write_all(struct mm_struct *mm) { mmap_assert_write_locked(mm); @@ -118,16 +126,6 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm) return ret; } -static inline bool mmap_write_trylock(struct mm_struct *mm) -{ - bool ret; - - __mmap_lock_trace_start_locking(mm, true); - ret = down_write_trylock(&mm->mmap_lock) != 0; - __mmap_lock_trace_acquire_returned(mm, true, ret); - return ret; -} - static inline void mmap_write_unlock(struct mm_struct *mm) { __mmap_lock_trace_released(mm, true); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index daa2f40d9ce6..7b12eebc5586 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -295,7 +295,9 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ #define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ +#define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ + bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ unsigned int erase_size; /* erase size in sectors */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 461d1543893b..2f445c651742 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -184,6 +184,12 @@ struct mmc_host_ops { /* Execute HS400 tuning depending host driver */ int (*execute_hs400_tuning)(struct mmc_host *host, struct mmc_card *card); + /* Optional callback to prepare for SD high-speed tuning */ + int (*prepare_sd_hs_tuning)(struct mmc_host *host, struct mmc_card *card); + + /* Optional callback to execute SD high-speed tuning */ + int (*execute_sd_hs_tuning)(struct mmc_host *host, struct mmc_card *card); + /* Prepare switch to DDR during the HS400 init sequence */ int (*hs400_prepare_ddr)(struct mmc_host *host); @@ -520,6 +526,7 @@ struct mmc_host { /* Host Software Queue support */ bool hsq_enabled; + int hsq_depth; u32 err_stats[MMC_ERR_MAX]; unsigned long private[] ____cacheline_aligned; @@ -665,6 +672,8 @@ static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host, host->err_stats[stat] += 1; } +int mmc_sd_switch(struct mmc_card *card, int mode, int group, u8 value, u8 *resp); +int mmc_send_status(struct mmc_card *card, u32 *status); int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode); int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 64a3e051c3c4..f349e08a9dfe 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -187,27 +187,27 @@ struct mmu_notifier_ops { const struct mmu_notifier_range *range); /* - * invalidate_range() is either called between - * invalidate_range_start() and invalidate_range_end() when the - * VM has to free pages that where unmapped, but before the - * pages are actually freed, or outside of _start()/_end() when - * a (remote) TLB is necessary. + * arch_invalidate_secondary_tlbs() is used to manage a non-CPU TLB + * which shares page-tables with the CPU. The + * invalidate_range_start()/end() callbacks should not be implemented as + * invalidate_secondary_tlbs() already catches the points in time when + * an external TLB needs to be flushed. * - * If invalidate_range() is used to manage a non-CPU TLB with - * shared page-tables, it not necessary to implement the - * invalidate_range_start()/end() notifiers, as - * invalidate_range() already catches the points in time when an - * external TLB range needs to be flushed. For more in depth - * discussion on this see Documentation/mm/mmu_notifier.rst + * This requires arch_invalidate_secondary_tlbs() to be called while + * holding the ptl spin-lock and therefore this callback is not allowed + * to sleep. * - * Note that this function might be called with just a sub-range - * of what was passed to invalidate_range_start()/end(), if - * called between those functions. + * This is called by architecture code whenever invalidating a TLB + * entry. It is assumed that any secondary TLB has the same rules for + * when invalidations are required. If this is not the case architecture + * code will need to call this explicitly when required for secondary + * TLB invalidation. */ - void (*invalidate_range)(struct mmu_notifier *subscription, - struct mm_struct *mm, - unsigned long start, - unsigned long end); + void (*arch_invalidate_secondary_tlbs)( + struct mmu_notifier *subscription, + struct mm_struct *mm, + unsigned long start, + unsigned long end); /* * These callbacks are used with the get/put interface to manage the @@ -395,10 +395,9 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm, extern void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte); extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r); -extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r, - bool only_end); -extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, - unsigned long start, unsigned long end); +extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r); +extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, + unsigned long start, unsigned long end); extern bool mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range); @@ -460,7 +459,14 @@ mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) lock_map_release(&__mmu_notifier_invalidate_range_start_map); } -static inline int +/* + * This version of mmu_notifier_invalidate_range_start() avoids blocking, but it + * can return an error if a notifier can't proceed without blocking, in which + * case you're not allowed to modify PTEs in the specified range. + * + * This is mainly intended for OOM handling. + */ +static inline int __must_check mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { int ret = 0; @@ -481,21 +487,14 @@ mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) might_sleep(); if (mm_has_notifiers(range->mm)) - __mmu_notifier_invalidate_range_end(range, false); -} - -static inline void -mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range) -{ - if (mm_has_notifiers(range->mm)) - __mmu_notifier_invalidate_range_end(range, true); + __mmu_notifier_invalidate_range_end(range); } -static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, + unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) - __mmu_notifier_invalidate_range(mm, start, end); + __mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); } static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) @@ -582,45 +581,6 @@ static inline void mmu_notifier_range_init_owner( __young; \ }) -#define ptep_clear_flush_notify(__vma, __address, __ptep) \ -({ \ - unsigned long ___addr = __address & PAGE_MASK; \ - struct mm_struct *___mm = (__vma)->vm_mm; \ - pte_t ___pte; \ - \ - ___pte = ptep_clear_flush(__vma, __address, __ptep); \ - mmu_notifier_invalidate_range(___mm, ___addr, \ - ___addr + PAGE_SIZE); \ - \ - ___pte; \ -}) - -#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd) \ -({ \ - unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ - struct mm_struct *___mm = (__vma)->vm_mm; \ - pmd_t ___pmd; \ - \ - ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd); \ - mmu_notifier_invalidate_range(___mm, ___haddr, \ - ___haddr + HPAGE_PMD_SIZE); \ - \ - ___pmd; \ -}) - -#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \ -({ \ - unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \ - struct mm_struct *___mm = (__vma)->vm_mm; \ - pud_t ___pud; \ - \ - ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \ - mmu_notifier_invalidate_range(___mm, ___haddr, \ - ___haddr + HPAGE_PUD_SIZE); \ - \ - ___pud; \ -}) - /* * set_pte_at_notify() sets the pte _after_ running the notifier. * This is safe to start by updating the secondary MMUs, because the primary MMU @@ -711,12 +671,7 @@ void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) { } -static inline void -mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range) -{ -} - -static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, +static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, unsigned long start, unsigned long end) { } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5e50b78d58ea..9db36e197712 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -505,33 +505,37 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); * the old generation, is incremented when all its bins become empty. * * There are four operations: - * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its + * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its * current generation (old or young) and updates its "seg" to "head"; - * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its + * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its * current generation (old or young) and updates its "seg" to "tail"; - * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old + * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old * generation, updates its "gen" to "old" and resets its "seg" to "default"; - * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the + * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the * young generation, updates its "gen" to "young" and resets its "seg" to * "default". * * The events that trigger the above operations are: * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; - * 2. The first attempt to reclaim an memcg below low, which triggers + * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; - * 3. The first attempt to reclaim an memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_TAIL; - * 4. The second attempt to reclaim an memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_YOUNG; - * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; + * 3. The first attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_TAIL; + * 4. The second attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_YOUNG; + * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; - * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. + * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. * - * Note that memcg LRU only applies to global reclaim, and the round-robin - * incrementing of their max_seq counters ensures the eventual fairness to all - * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * Notes: + * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing + * of their max_seq counters ensures the eventual fairness to all eligible + * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * 2. There are only two valid generations: old (seq) and young (seq+1). + * MEMCG_NR_GENS is set to three so that when reading the generation counter + * locklessly, a stale value (seq-1) does not wraparound to young. */ -#define MEMCG_NR_GENS 2 +#define MEMCG_NR_GENS 3 #define MEMCG_NR_BINS 8 struct lru_gen_memcg { @@ -639,8 +643,6 @@ struct lruvec { #endif }; -/* Isolate unmapped pages */ -#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) /* Isolate unevictable pages */ @@ -676,16 +678,34 @@ enum zone_watermarks { #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) -/* Fields and list protected by pagesets local_lock in page_alloc.c */ +/* + * Flags used in pcp->flags field. + * + * PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the + * previous page freeing. To avoid to drain PCP for an accident + * high-order page freeing. + * + * PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before + * draining PCP for consecutive high-order pages freeing without + * allocation if data cache slice of CPU is large enough. To reduce + * zone lock contention and keep cache-hot pages reusing. + */ +#define PCPF_PREV_FREE_HIGH_ORDER BIT(0) +#define PCPF_FREE_HIGH_BATCH BIT(1) + struct per_cpu_pages { spinlock_t lock; /* Protects lists field */ int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ + int high_min; /* min high watermark */ + int high_max; /* max high watermark */ int batch; /* chunk size for buddy add/remove */ - short free_factor; /* batch scaling factor during free */ + u8 flags; /* protected by pcp->lock */ + u8 alloc_factor; /* batch scaling factor during allocate */ #ifdef CONFIG_NUMA - short expire; /* When 0, remote pagesets are drained */ + u8 expire; /* When 0, remote pagesets are drained */ #endif + short free_count; /* consecutive free count */ /* Lists of pages, one per migrate type stored on the pcp-lists */ struct list_head lists[NR_PCP_LISTS]; @@ -838,7 +858,8 @@ struct zone { * the high and batch values are copied to individual pagesets for * faster access */ - int pageset_high; + int pageset_high_min; + int pageset_high_max; int pageset_batch; #ifndef CONFIG_SPARSEMEM @@ -999,6 +1020,7 @@ enum zone_flags { * Cleared when kswapd is woken. */ ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */ + ZONE_BELOW_HIGH, /* zone is below high watermark. */ }; static inline unsigned long zone_managed_pages(struct zone *zone) diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 057c89867aa2..b8da2db4ecd2 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -115,6 +115,9 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) int vfsgid_in_group_p(vfsgid_t vfsgid); +struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); +void mnt_idmap_put(struct mnt_idmap *idmap); + vfsuid_t make_vfsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, kuid_t kuid); diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b0678b093cb2..f458469c5ce5 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -935,6 +935,12 @@ enum { * struct cdx_device_id - CDX device identifier * @vendor: Vendor ID * @device: Device ID + * @subvendor: Subsystem vendor ID (or CDX_ANY_ID) + * @subdevice: Subsystem device ID (or CDX_ANY_ID) + * @class: Device class + * Most drivers do not need to specify class/class_mask + * as vendor/device is normally sufficient. + * @class_mask: Limit which sub-fields of the class field are compared. * @override_only: Match only when dev->driver_override is this driver. * * Type of entries in the "device Id" table for CDX devices supported by @@ -943,7 +949,15 @@ enum { struct cdx_device_id { __u16 vendor; __u16 device; + __u16 subvendor; + __u16 subdevice; + __u32 class; + __u32 class_mask; __u32 override_only; }; +struct vchiq_device_id { + char name[32]; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/module_symbol.h b/include/linux/module_symbol.h index 7ace7ba30203..77c9895b9ddb 100644 --- a/include/linux/module_symbol.h +++ b/include/linux/module_symbol.h @@ -3,15 +3,13 @@ #define _LINUX_MODULE_SYMBOL_H /* This ignores the intensely annoying "mapping symbols" found in ELF files. */ -static inline int is_mapping_symbol(const char *str) +static inline bool is_mapping_symbol(const char *str) { if (str[0] == '.' && str[1] == 'L') return true; if (str[0] == 'L' && str[1] == '0') return true; - return str[0] == '$' && - (str[1] == 'a' || str[1] == 'd' || str[1] == 't' || str[1] == 'x') - && (str[2] == '\0' || str[2] == '.'); + return str[0] == '$'; } #endif /* _LINUX_MODULE_SYMBOL_H */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 03be088fb439..001b2ce83832 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -42,6 +42,11 @@ bool module_init_section(const char *name); */ bool module_exit_section(const char *name); +/* Describes whether within_module_init() will consider this an init section + * or not. This behaviour changes with CONFIG_MODULE_UNLOAD. + */ +bool module_init_layout_section(const char *sname); + /* * Apply the given relocation to the (simplified) ELF. Return -error * or 0. diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 962cd41a2cb5..4fa9726bc328 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -276,7 +276,7 @@ struct kparam_array read-only sections (which is part of respective UNIX ABI on these platforms). So 'const' makes no sense and even causes compile failures with some compilers. */ -#if defined(CONFIG_ALPHA) || defined(CONFIG_IA64) || defined(CONFIG_PPC64) +#if defined(CONFIG_ALPHA) || defined(CONFIG_PPC64) #define __moduleparam_const #else #define __moduleparam_const const @@ -293,7 +293,11 @@ struct kparam_array = { __param_str_##name, THIS_MODULE, ops, \ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } -/* Obsolete - use module_param_cb() */ +/* + * Useful for describing a set/get pair used only once (i.e. for this + * parameter). For repeated set/get pairs (i.e. the same struct + * kernel_param_ops), use module_param_cb() instead. + */ #define module_param_call(name, _set, _get, arg, perm) \ static const struct kernel_param_ops __param_ops_##name = \ { .flags = 0, .set = _set, .get = _get }; \ diff --git a/include/linux/mount.h b/include/linux/mount.h index 4f40b40306d0..ac3dd2876197 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -92,8 +92,8 @@ extern bool __mnt_is_readonly(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); extern struct vfsmount *clone_private_mount(const struct path *path); -extern int __mnt_want_write(struct vfsmount *); -extern void __mnt_drop_write(struct vfsmount *); +int mnt_get_write_access(struct vfsmount *mnt); +void mnt_put_write_access(struct vfsmount *mnt); extern struct vfsmount *fc_mount(struct fs_context *fc); extern struct vfsmount *vfs_create_mount(struct fs_context *fc); diff --git a/include/linux/msi.h b/include/linux/msi.h index a50ea79522f8..ddace8c34dcf 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -547,12 +547,6 @@ enum { MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5), /* Free MSI descriptors */ MSI_FLAG_FREE_MSI_DESCS = (1 << 6), - /* - * Quirk to handle MSI implementations which do not provide - * masking. Currently known to affect x86, but has to be partially - * handled in the core MSI code. - */ - MSI_FLAG_NOMASK_QUIRK = (1 << 7), /* Mask for the generic functionality */ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index d88bb56c18e2..947410faf9e2 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -287,7 +287,7 @@ struct cfi_private { unsigned long chipshift; /* Because they're of the same type */ const char *im_name; /* inter_module name for cmdset_setup */ unsigned long quirks; - struct flchip chips[]; /* per-chip data structure for each chip */ + struct flchip chips[] __counted_by(numchips); /* per-chip data structure for each chip */ }; uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h index 0b6b59f7cfbd..56047a4e54c9 100644 --- a/include/linux/mtd/jedec.h +++ b/include/linux/mtd/jedec.h @@ -21,6 +21,9 @@ struct jedec_ecc_info { /* JEDEC features */ #define JEDEC_FEATURE_16_BIT_BUS (1 << 0) +/* JEDEC Optional Commands */ +#define JEDEC_OPT_CMD_READ_CACHE BIT(1) + struct nand_jedec_params { /* rev info and features block */ /* 'J' 'E' 'S' 'D' */ diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 7c58c44662b8..914a9f974baa 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -379,7 +379,7 @@ struct mtd_info { struct module *owner; struct device dev; - int usecount; + struct kref refcnt; struct mtd_debug_info dbg; struct nvmem_device *nvmem; struct nvmem_device *otp_user_nvmem; diff --git a/include/linux/mtd/onfi.h b/include/linux/mtd/onfi.h index a7376f9beddf..55ab2e4d62f9 100644 --- a/include/linux/mtd/onfi.h +++ b/include/linux/mtd/onfi.h @@ -55,6 +55,7 @@ #define ONFI_SUBFEATURE_PARAM_LEN 4 /* ONFI optional commands SET/GET FEATURES supported? */ +#define ONFI_OPT_CMD_READ_CACHE BIT(1) #define ONFI_OPT_CMD_SET_GET_FEATURES BIT(2) struct nand_onfi_params { diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h index 2e3f43788d48..0421f12156b5 100644 --- a/include/linux/mtd/qinfo.h +++ b/include/linux/mtd/qinfo.h @@ -24,7 +24,7 @@ struct lpddr_private { struct qinfo_chip *qinfo; int numchips; unsigned long chipshift; - struct flchip chips[]; + struct flchip chips[] __counted_by(numchips); }; /* qinfo_query_info structure contains request information for diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 5159d692f9ce..c29ace15a053 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -225,6 +225,7 @@ struct gpio_desc; * struct nand_parameters - NAND generic parameters from the parameter page * @model: Model name * @supports_set_get_features: The NAND chip supports setting/getting features + * @supports_read_cache: The NAND chip supports read cache operations * @set_feature_list: Bitmap of features that can be set * @get_feature_list: Bitmap of features that can be get * @onfi: ONFI specific parameters @@ -233,6 +234,7 @@ struct nand_parameters { /* Generic parameters */ const char *model; bool supports_set_get_features; + bool supports_read_cache; DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER); DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); @@ -1540,6 +1542,7 @@ int nand_reset_op(struct nand_chip *chip); int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf, unsigned int len); int nand_status_op(struct nand_chip *chip, u8 *status); +int nand_exit_status_op(struct nand_chip *chip); int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock); int nand_read_page_op(struct nand_chip *chip, unsigned int page, unsigned int offset_in_page, void *buf, unsigned int len); diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 3e285c09d16d..badb4c1ac079 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -263,6 +263,7 @@ struct spinand_manufacturer { extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer; +extern const struct spinand_manufacturer foresee_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; diff --git a/include/linux/namei.h b/include/linux/namei.h index 1463cbda4888..3100371b5e32 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -92,6 +92,30 @@ extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); +/** + * mode_strip_umask - handle vfs umask stripping + * @dir: parent directory of the new inode + * @mode: mode of the new inode to be created in @dir + * + * In most filesystems, umask stripping depends on whether or not the + * filesystem supports POSIX ACLs. If the filesystem doesn't support it umask + * stripping is done directly in here. If the filesystem does support POSIX + * ACLs umask stripping is deferred until the filesystem calls + * posix_acl_create(). + * + * Some filesystems (like NFSv4) also want to avoid umask stripping by the + * VFS, but don't support POSIX ACLs. Those filesystems can set SB_I_NOUMASK + * to get this effect without declaring that they support POSIX ACLs. + * + * Returns: mode + */ +static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umode_t mode) +{ + if (!IS_POSIXACL(dir) && !(dir->i_sb->s_iflags & SB_I_NOUMASK)) + mode &= ~current_umask(); + return mode; +} + extern int __must_check nd_jump_link(const struct path *path); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) @@ -112,7 +136,7 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) static inline bool retry_estale(const long error, const unsigned int flags) { - return error == -ESTALE && !(flags & LOOKUP_REVAL); + return unlikely(error == -ESTALE && !(flags & LOOKUP_REVAL)); } #endif /* _LINUX_NAMEI_H */ diff --git a/include/linux/net.h b/include/linux/net.h index 41c608c1b02c..c9b4a63791a4 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -123,7 +123,7 @@ struct socket { struct file *file; struct sock *sk; - const struct proto_ops *ops; + const struct proto_ops *ops; /* Might change with IPV6_ADDRFORM or MPTCP. */ struct socket_wq wq; }; diff --git a/include/linux/net_mm.h b/include/linux/net_mm.h deleted file mode 100644 index b298998bd5a0..000000000000 --- a/include/linux/net_mm.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifdef CONFIG_MMU - -#ifdef CONFIG_INET -extern const struct vm_operations_struct tcp_vm_ops; -static inline bool vma_is_tcp(const struct vm_area_struct *vma) -{ - return vma->vm_ops == &tcp_vm_ops; -} -#else -static inline bool vma_is_tcp(const struct vm_area_struct *vma) -{ - return false; -} -#endif /* CONFIG_INET*/ - -#endif /* CONFIG_MMU */ diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index fd67f3cc0c4b..eb01c37e71e0 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -5,12 +5,23 @@ #include <uapi/linux/net_tstamp.h> +enum hwtstamp_source { + HWTSTAMP_SOURCE_NETDEV, + HWTSTAMP_SOURCE_PHYLIB, +}; + /** * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config * * @flags: see struct hwtstamp_config * @tx_type: see struct hwtstamp_config * @rx_filter: see struct hwtstamp_config + * @ifr: pointer to ifreq structure from the original ioctl request, to pass to + * a legacy implementation of a lower driver + * @copied_to_user: request was passed to a legacy implementation which already + * copied the ioctl request back to user space + * @source: indication whether timestamps should come from the netdev or from + * an attached phylib PHY * * Prefer using this structure for in-kernel processing of hardware * timestamping configuration, over the inextensible struct hwtstamp_config @@ -20,6 +31,9 @@ struct kernel_hwtstamp_config { int flags; int tx_type; int rx_filter; + struct ifreq *ifr; + bool copied_to_user; + enum hwtstamp_source source; }; static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, @@ -30,4 +44,20 @@ static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kern kernel_cfg->rx_filter = cfg->rx_filter; } +static inline void hwtstamp_config_from_kernel(struct hwtstamp_config *cfg, + const struct kernel_hwtstamp_config *kernel_cfg) +{ + cfg->flags = kernel_cfg->flags; + cfg->tx_type = kernel_cfg->tx_type; + cfg->rx_filter = kernel_cfg->rx_filter; +} + +static inline bool kernel_hwtstamp_config_changed(const struct kernel_hwtstamp_config *a, + const struct kernel_hwtstamp_config *b) +{ + return a->flags != b->flags || + a->tx_type != b->tx_type || + a->rx_filter != b->rx_filter; +} + #endif /* _LINUX_NET_TIMESTAMPING_H_ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b828c7a75be2..2564e209465e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -40,7 +40,6 @@ #include <net/dcbnl.h> #endif #include <net/netprio_cgroup.h> -#include <net/xdp.h> #include <linux/netdev_features.h> #include <linux/neighbour.h> @@ -57,6 +56,7 @@ struct netpoll_info; struct device; struct ethtool_ops; +struct kernel_hwtstamp_config; struct phy_device; struct dsa_port; struct ip_tunnel_parm; @@ -76,7 +76,13 @@ struct udp_tunnel_nic_info; struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; +struct xdp_frame; +struct xdp_metadata_ops; struct xdp_md; +/* DPLL specific */ +struct dpll_pin; + +typedef u32 xdp_features_t; void synchronize_net(void); void netdev_set_default_ethtool_ops(struct net_device *dev, @@ -476,6 +482,29 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n) return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); } +/** + * napi_is_scheduled - test if NAPI is scheduled + * @n: NAPI context + * + * This check is "best-effort". With no locking implemented, + * a NAPI can be scheduled or terminate right after this check + * and produce not precise results. + * + * NAPI_STATE_SCHED is an internal state, napi_is_scheduled + * should not be used normally and napi_schedule should be + * used instead. + * + * Use only if the driver really needs to check if a NAPI + * is scheduled for example in the context of delayed timer + * that can be skipped if a NAPI is already scheduled. + * + * Return True if NAPI is scheduled, False otherwise. + */ +static inline bool napi_is_scheduled(struct napi_struct *n) +{ + return test_bit(NAPI_STATE_SCHED, &n->state); +} + bool napi_schedule_prep(struct napi_struct *n); /** @@ -484,11 +513,18 @@ bool napi_schedule_prep(struct napi_struct *n); * * Schedule NAPI poll routine to be called if it is not already * running. + * Return true if we schedule a NAPI or false if not. + * Refer to napi_schedule_prep() for additional reason on why + * a NAPI might not be scheduled. */ -static inline void napi_schedule(struct napi_struct *n) +static inline bool napi_schedule(struct napi_struct *n) { - if (napi_schedule_prep(n)) + if (napi_schedule_prep(n)) { __napi_schedule(n); + return true; + } + + return false; } /** @@ -503,16 +539,6 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) __napi_schedule_irqoff(n); } -/* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ -static inline bool napi_reschedule(struct napi_struct *napi) -{ - if (napi_schedule_prep(napi)) { - __napi_schedule(napi); - return true; - } - return false; -} - /** * napi_complete_done - NAPI processing complete * @n: NAPI context @@ -782,32 +808,6 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, #endif #endif /* CONFIG_RPS */ -/* This structure contains an instance of an RX queue. */ -struct netdev_rx_queue { - struct xdp_rxq_info xdp_rxq; -#ifdef CONFIG_RPS - struct rps_map __rcu *rps_map; - struct rps_dev_flow_table __rcu *rps_flow_table; -#endif - struct kobject kobj; - struct net_device *dev; - netdevice_tracker dev_tracker; - -#ifdef CONFIG_XDP_SOCKETS - struct xsk_buff_pool *pool; -#endif -} ____cacheline_aligned_in_smp; - -/* - * RX queue sysfs structures and functions. - */ -struct rx_queue_attribute { - struct attribute attr; - ssize_t (*show)(struct netdev_rx_queue *queue, char *buf); - ssize_t (*store)(struct netdev_rx_queue *queue, - const char *buf, size_t len); -}; - /* XPS map type and offset of the xps map within net_device->xps_maps[]. */ enum xps_map_type { XPS_CPUS = 0, @@ -939,6 +939,7 @@ struct net_device_path { u8 queue; u16 wcid; u8 bss; + u8 amsdu; } mtk_wdma; }; }; @@ -1309,9 +1310,7 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. - * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], - * struct net_device *dev, - * u16 vid, + * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, @@ -1418,6 +1417,16 @@ struct netdev_net_notifier { * Get hardware timestamp based on normal/adjustable time or free running * cycle counter. This function is required if physical clock supports a * free running cycle counter. + * + * int (*ndo_hwtstamp_get)(struct net_device *dev, + * struct kernel_hwtstamp_config *kernel_config); + * Get the currently configured hardware timestamping parameters for the + * NIC device. + * + * int (*ndo_hwtstamp_set)(struct net_device *dev, + * struct kernel_hwtstamp_config *kernel_config, + * struct netlink_ext_ack *extack); + * Change the hardware timestamping parameters for NIC device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1576,10 +1585,8 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); - int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, - struct nlattr *tb[], + int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, - u16 vid, struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1602,6 +1609,10 @@ struct net_device_ops { int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); + int (*ndo_mdb_get)(struct net_device *dev, + struct nlattr *tb[], u32 portid, + u32 seq, + struct netlink_ext_ack *extack); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, @@ -1652,12 +1663,11 @@ struct net_device_ops { ktime_t (*ndo_get_tstamp)(struct net_device *dev, const struct skb_shared_hwtstamps *hwtstamps, bool cycles); -}; - -struct xdp_metadata_ops { - int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); - int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, - enum xdp_rss_hash_type *rss_type); + int (*ndo_hwtstamp_get)(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config); + int (*ndo_hwtstamp_set)(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config, + struct netlink_ext_ack *extack); }; /** @@ -1708,6 +1718,9 @@ struct xdp_metadata_ops { * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0) * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN + * @IFF_SEE_ALL_HWTSTAMP_REQUESTS: device wants to see calls to + * ndo_hwtstamp_set() for all timestamp requests regardless of source, + * even if those aren't HWTSTAMP_SOURCE_NETDEV. */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1743,6 +1756,7 @@ enum netdev_priv_flags { IFF_NO_ADDRCONF = BIT_ULL(30), IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), + IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33), }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1783,6 +1797,13 @@ enum netdev_ml_priv_type { ML_PRIV_CAN, }; +enum netdev_stat_type { + NETDEV_PCPU_STAT_NONE, + NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */ + NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */ + NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ +}; + /** * struct net_device - The DEVICE structure. * @@ -1930,8 +1951,7 @@ enum netdev_ml_priv_type { * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one - * @miniq_ingress: ingress/clsact qdisc specific data for - * ingress processing + * @tcx_ingress: BPF & clsact qdisc specific data for ingress processing * @ingress_queue: XXX: need comments on this one * @nf_hooks_ingress: netfilter hooks executed for ingress packets * @broadcast: hw bcast address @@ -1952,8 +1972,7 @@ enum netdev_ml_priv_type { * @xps_maps: all CPUs/RXQs maps for XPS device * * @xps_maps: XXX: need comments on this one - * @miniq_egress: clsact qdisc specific data for - * egress processing + * @tcx_egress: BPF & clsact qdisc specific data for egress processing * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by @@ -1979,10 +1998,14 @@ enum netdev_ml_priv_type { * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type - * @lstats: Loopback statistics - * @tstats: Tunnel statistics - * @dstats: Dummy statistics - * @vstats: Virtual ethernet statistics + * + * @pcpu_stat_type: Type of device statistics which the core should + * allocate/free: none, lstats, tstats, dstats. none + * means the driver is handling statistics allocation/ + * freeing internally. + * @lstats: Loopback statistics: packets, bytes + * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes + * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes * * @garp_port: GARP * @mrp_port: MRP @@ -2045,6 +2068,8 @@ enum netdev_ml_priv_type { * receive offload (GRO) * @gro_ipv4_max_size: Maximum size of aggregated packet in generic * receive offload (GRO), for IPv4. + * @xdp_zc_max_segs: Maximum number of segments supported by AF_XDP + * zero copy driver * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. @@ -2058,6 +2083,9 @@ enum netdev_ml_priv_type { * SET_NETDEV_DEVLINK_PORT macro. This pointer is static * during the time netdevice is registered. * + * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, + * where the clock is recovered. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2250,11 +2278,11 @@ struct net_device { #define GRO_MAX_SIZE (8 * 65535u) unsigned int gro_max_size; unsigned int gro_ipv4_max_size; + unsigned int xdp_zc_max_segs; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; - -#ifdef CONFIG_NET_CLS_ACT - struct mini_Qdisc __rcu *miniq_ingress; +#ifdef CONFIG_NET_XGRESS + struct bpf_mprog_entry __rcu *tcx_ingress; #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS @@ -2282,8 +2310,8 @@ struct net_device { #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; #endif -#ifdef CONFIG_NET_CLS_ACT - struct mini_Qdisc __rcu *miniq_egress; +#ifdef CONFIG_NET_XGRESS + struct bpf_mprog_entry __rcu *tcx_egress; #endif #ifdef CONFIG_NETFILTER_EGRESS struct nf_hook_entries __rcu *nf_hooks_egress; @@ -2337,6 +2365,7 @@ struct net_device { void *ml_priv; enum netdev_ml_priv_type ml_priv_type; + enum netdev_stat_type pcpu_stat_type:8; union { struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; @@ -2414,6 +2443,10 @@ struct net_device { struct rtnl_hw_stats64 *offload_xstats_l3; struct devlink_port *devlink_port; + +#if IS_ENABLED(CONFIG_DPLL) + struct dpll_pin *dpll_pin; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2734,6 +2767,16 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); +struct pcpu_dstats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + struct u64_stats_sync syncp; +} __aligned(8 * sizeof(u64)); + struct pcpu_lstats { u64_stats_t packets; u64_stats_t bytes; @@ -3016,6 +3059,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */ if (netdev_master_upper_dev_get_rcu(slave) == (bond)) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) +#define for_each_netdev_dump(net, d, ifindex) \ + xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex)) + static inline struct net_device *next_net_device(struct net_device *dev) { struct list_head *lh; @@ -3130,8 +3176,6 @@ struct net_device *netdev_get_by_name(struct net *net, const char *name, netdevice_tracker *tracker, gfp_t gfp); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_napi_id(unsigned int napi_id); -int dev_restart(struct net_device *dev); - static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, @@ -3830,24 +3874,6 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev, int netif_set_real_num_queues(struct net_device *dev, unsigned int txq, unsigned int rxq); -static inline struct netdev_rx_queue * -__netif_get_rx_queue(struct net_device *dev, unsigned int rxq) -{ - return dev->_rx + rxq; -} - -#ifdef CONFIG_SYSFS -static inline unsigned int get_netdev_rx_queue_index( - struct netdev_rx_queue *queue) -{ - struct net_device *dev = queue->dev; - int index = queue - dev->_rx; - - BUG_ON(index >= dev->num_rx_queues); - return index; -} -#endif - int netif_get_num_default_rss_queues(void); void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason); @@ -3933,6 +3959,11 @@ int put_user_ifreq(struct ifreq *ifr, void __user *arg); int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, void __user *data, bool *need_copyout); int dev_ifconf(struct net *net, struct ifconf __user *ifc); +int generic_hwtstamp_get_lower(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_cfg); +int generic_hwtstamp_set_lower(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_cfg, + struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, @@ -3961,6 +3992,18 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); +void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void netdev_dpll_pin_clear(struct net_device *dev); + +static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_DPLL) + return dev->dpll_pin; +#else + return NULL; +#endif +} + struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); @@ -4001,32 +4044,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev); - -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev) -{ - /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ - struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); - - if (likely(p)) - return p; - - return netdev_core_stats_alloc(dev); -} +void netdev_core_stats_inc(struct net_device *dev, u32 offset); #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ - struct net_device_core_stats __percpu *p; \ - \ - p = dev_core_stats(dev); \ - if (p) \ - this_cpu_inc(p->FIELD); \ + netdev_core_stats_inc(dev, \ + offsetof(struct net_device_core_stats, FIELD)); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) DEV_CORE_STATS_INC(rx_otherhost_dropped) +#undef DEV_CORE_STATS_INC static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, @@ -5102,6 +5132,11 @@ static inline bool netif_is_ovs_port(const struct net_device *dev) return dev->priv_flags & IFF_OVS_DATAPATH; } +static inline bool netif_is_any_bridge_master(const struct net_device *dev) +{ + return netif_is_bridge_master(dev) || netif_is_ovs_master(dev); +} + static inline bool netif_is_any_bridge_port(const struct net_device *dev) { return netif_is_bridge_port(dev) || netif_is_ovs_port(dev); @@ -5230,5 +5265,6 @@ extern struct net_device *blackhole_netdev; #define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) #define DEV_STATS_ADD(DEV, FIELD, VAL) \ atomic_long_add((VAL), &(DEV)->stats.__##FIELD) +#define DEV_STATS_READ(DEV, FIELD) atomic_long_read(&(DEV)->stats.__##FIELD) #endif /* _LINUX_NETDEVICE_H */ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d4fed4c508ca..80900d910992 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -11,6 +11,7 @@ #include <linux/wait.h> #include <linux/list.h> #include <linux/static_key.h> +#include <linux/module.h> #include <linux/netfilter_defs.h> #include <linux/netdevice.h> #include <linux/sockptr.h> @@ -21,6 +22,16 @@ static inline int NF_DROP_GETERR(int verdict) return -(verdict >> NF_VERDICT_QBITS); } +static __always_inline int +NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err) +{ + BUILD_BUG_ON(err > 0xffff); + + kfree_skb_reason(skb, reason); + + return ((err << 16) | NF_STOLEN); +} + static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { @@ -481,6 +492,15 @@ struct nfnl_ct_hook { }; extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; +struct nf_defrag_hook { + struct module *owner; + int (*enable)(struct net *net); + void (*disable)(struct net *net); +}; + +extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook; +extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook; + /* * nf_skb_duplicated - TEE target has sent a packet * diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h index 9e937f64a1ad..81286c499325 100644 --- a/include/linux/netfilter/nf_conntrack_h323.h +++ b/include/linux/netfilter/nf_conntrack_h323.h @@ -34,10 +34,6 @@ struct nf_ct_h323_master { int get_h225_addr(struct nf_conn *ct, unsigned char *data, TransportAddress *taddr, union nf_inet_addr *addr, __be16 *port); -void nf_conntrack_h245_expect(struct nf_conn *new, - struct nf_conntrack_expect *this); -void nf_conntrack_q931_expect(struct nf_conn *new, - struct nf_conntrack_expect *this); struct nfct_h323_nat_hooks { int (*set_h245_addr)(struct sk_buff *skb, unsigned int protoff, diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index f33aa6021364..34ce5d2f37a2 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -25,7 +25,6 @@ struct nf_ct_gre_keymap { int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t); -void nf_ct_gre_keymap_flush(struct net *net); /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h index 625f491b95de..fb31312825ae 100644 --- a/include/linux/netfilter/nf_conntrack_sctp.h +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -9,6 +9,7 @@ struct ip_ct_sctp { enum sctp_conntrack state; __be32 vtag[IP_CT_DIR_MAX]; + u8 init[IP_CT_DIR_MAX]; u8 last_dir; u8 flags; }; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9eec3f4f5351..75d7de34c908 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -50,6 +50,7 @@ struct netlink_kernel_cfg { struct mutex *cb_mutex; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); + void (*release) (struct sock *sk, unsigned long *groups); }; struct sock *__netlink_kernel_create(struct net *net, int unit, @@ -227,6 +228,11 @@ bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); +int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, + __u32 portid, __u32 group, gfp_t allocation, + int (*filter)(struct sock *dsk, + struct sk_buff *skb, void *data), + void *filter_data); int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); int netlink_register_notifier(struct notifier_block *nb); int netlink_unregister_notifier(struct notifier_block *nb); diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 730003c4f4af..c11c4db34639 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -150,7 +150,7 @@ enum nfs_opnum4 { OP_WRITE_SAME = 70, OP_CLONE = 71, - /* xattr support (RFC8726) */ + /* xattr support (RFC8276) */ OP_GETXATTR = 72, OP_SETXATTR = 73, OP_LISTXATTRS = 74, @@ -389,79 +389,203 @@ enum lock_type4 { NFS4_WRITEW_LT = 4 }; +/* + * Symbol names and values are from RFC 7531 Section 2. + * "XDR Description of NFSv4.0" + */ +enum { + FATTR4_SUPPORTED_ATTRS = 0, + FATTR4_TYPE = 1, + FATTR4_FH_EXPIRE_TYPE = 2, + FATTR4_CHANGE = 3, + FATTR4_SIZE = 4, + FATTR4_LINK_SUPPORT = 5, + FATTR4_SYMLINK_SUPPORT = 6, + FATTR4_NAMED_ATTR = 7, + FATTR4_FSID = 8, + FATTR4_UNIQUE_HANDLES = 9, + FATTR4_LEASE_TIME = 10, + FATTR4_RDATTR_ERROR = 11, + FATTR4_ACL = 12, + FATTR4_ACLSUPPORT = 13, + FATTR4_ARCHIVE = 14, + FATTR4_CANSETTIME = 15, + FATTR4_CASE_INSENSITIVE = 16, + FATTR4_CASE_PRESERVING = 17, + FATTR4_CHOWN_RESTRICTED = 18, + FATTR4_FILEHANDLE = 19, + FATTR4_FILEID = 20, + FATTR4_FILES_AVAIL = 21, + FATTR4_FILES_FREE = 22, + FATTR4_FILES_TOTAL = 23, + FATTR4_FS_LOCATIONS = 24, + FATTR4_HIDDEN = 25, + FATTR4_HOMOGENEOUS = 26, + FATTR4_MAXFILESIZE = 27, + FATTR4_MAXLINK = 28, + FATTR4_MAXNAME = 29, + FATTR4_MAXREAD = 30, + FATTR4_MAXWRITE = 31, + FATTR4_MIMETYPE = 32, + FATTR4_MODE = 33, + FATTR4_NO_TRUNC = 34, + FATTR4_NUMLINKS = 35, + FATTR4_OWNER = 36, + FATTR4_OWNER_GROUP = 37, + FATTR4_QUOTA_AVAIL_HARD = 38, + FATTR4_QUOTA_AVAIL_SOFT = 39, + FATTR4_QUOTA_USED = 40, + FATTR4_RAWDEV = 41, + FATTR4_SPACE_AVAIL = 42, + FATTR4_SPACE_FREE = 43, + FATTR4_SPACE_TOTAL = 44, + FATTR4_SPACE_USED = 45, + FATTR4_SYSTEM = 46, + FATTR4_TIME_ACCESS = 47, + FATTR4_TIME_ACCESS_SET = 48, + FATTR4_TIME_BACKUP = 49, + FATTR4_TIME_CREATE = 50, + FATTR4_TIME_DELTA = 51, + FATTR4_TIME_METADATA = 52, + FATTR4_TIME_MODIFY = 53, + FATTR4_TIME_MODIFY_SET = 54, + FATTR4_MOUNTED_ON_FILEID = 55, +}; + +/* + * Symbol names and values are from RFC 5662 Section 2. + * "XDR Description of NFSv4.1" + */ +enum { + FATTR4_DIR_NOTIF_DELAY = 56, + FATTR4_DIRENT_NOTIF_DELAY = 57, + FATTR4_DACL = 58, + FATTR4_SACL = 59, + FATTR4_CHANGE_POLICY = 60, + FATTR4_FS_STATUS = 61, + FATTR4_FS_LAYOUT_TYPES = 62, + FATTR4_LAYOUT_HINT = 63, + FATTR4_LAYOUT_TYPES = 64, + FATTR4_LAYOUT_BLKSIZE = 65, + FATTR4_LAYOUT_ALIGNMENT = 66, + FATTR4_FS_LOCATIONS_INFO = 67, + FATTR4_MDSTHRESHOLD = 68, + FATTR4_RETENTION_GET = 69, + FATTR4_RETENTION_SET = 70, + FATTR4_RETENTEVT_GET = 71, + FATTR4_RETENTEVT_SET = 72, + FATTR4_RETENTION_HOLD = 73, + FATTR4_MODE_SET_MASKED = 74, + FATTR4_SUPPATTR_EXCLCREAT = 75, + FATTR4_FS_CHARSET_CAP = 76, +}; + +/* + * Symbol names and values are from RFC 7863 Section 2. + * "XDR Description of NFSv4.2" + */ +enum { + FATTR4_CLONE_BLKSIZE = 77, + FATTR4_SPACE_FREED = 78, + FATTR4_CHANGE_ATTR_TYPE = 79, + FATTR4_SEC_LABEL = 80, +}; + +/* + * Symbol names and values are from RFC 8275 Section 5. + * "The mode_umask Attribute" + */ +enum { + FATTR4_MODE_UMASK = 81, +}; + +/* + * Symbol names and values are from RFC 8276 Section 8.6. + * "Numeric Values Assigned to Protocol Extensions" + */ +enum { + FATTR4_XATTR_SUPPORT = 82, +}; + +/* + * The following internal definitions enable processing the above + * attribute bits within 32-bit word boundaries. + */ /* Mandatory Attributes */ -#define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0) -#define FATTR4_WORD0_TYPE (1UL << 1) -#define FATTR4_WORD0_FH_EXPIRE_TYPE (1UL << 2) -#define FATTR4_WORD0_CHANGE (1UL << 3) -#define FATTR4_WORD0_SIZE (1UL << 4) -#define FATTR4_WORD0_LINK_SUPPORT (1UL << 5) -#define FATTR4_WORD0_SYMLINK_SUPPORT (1UL << 6) -#define FATTR4_WORD0_NAMED_ATTR (1UL << 7) -#define FATTR4_WORD0_FSID (1UL << 8) -#define FATTR4_WORD0_UNIQUE_HANDLES (1UL << 9) -#define FATTR4_WORD0_LEASE_TIME (1UL << 10) -#define FATTR4_WORD0_RDATTR_ERROR (1UL << 11) +#define FATTR4_WORD0_SUPPORTED_ATTRS BIT(FATTR4_SUPPORTED_ATTRS) +#define FATTR4_WORD0_TYPE BIT(FATTR4_TYPE) +#define FATTR4_WORD0_FH_EXPIRE_TYPE BIT(FATTR4_FH_EXPIRE_TYPE) +#define FATTR4_WORD0_CHANGE BIT(FATTR4_CHANGE) +#define FATTR4_WORD0_SIZE BIT(FATTR4_SIZE) +#define FATTR4_WORD0_LINK_SUPPORT BIT(FATTR4_LINK_SUPPORT) +#define FATTR4_WORD0_SYMLINK_SUPPORT BIT(FATTR4_SYMLINK_SUPPORT) +#define FATTR4_WORD0_NAMED_ATTR BIT(FATTR4_NAMED_ATTR) +#define FATTR4_WORD0_FSID BIT(FATTR4_FSID) +#define FATTR4_WORD0_UNIQUE_HANDLES BIT(FATTR4_UNIQUE_HANDLES) +#define FATTR4_WORD0_LEASE_TIME BIT(FATTR4_LEASE_TIME) +#define FATTR4_WORD0_RDATTR_ERROR BIT(FATTR4_RDATTR_ERROR) /* Mandatory in NFSv4.1 */ -#define FATTR4_WORD2_SUPPATTR_EXCLCREAT (1UL << 11) +#define FATTR4_WORD2_SUPPATTR_EXCLCREAT BIT(FATTR4_SUPPATTR_EXCLCREAT - 64) /* Recommended Attributes */ -#define FATTR4_WORD0_ACL (1UL << 12) -#define FATTR4_WORD0_ACLSUPPORT (1UL << 13) -#define FATTR4_WORD0_ARCHIVE (1UL << 14) -#define FATTR4_WORD0_CANSETTIME (1UL << 15) -#define FATTR4_WORD0_CASE_INSENSITIVE (1UL << 16) -#define FATTR4_WORD0_CASE_PRESERVING (1UL << 17) -#define FATTR4_WORD0_CHOWN_RESTRICTED (1UL << 18) -#define FATTR4_WORD0_FILEHANDLE (1UL << 19) -#define FATTR4_WORD0_FILEID (1UL << 20) -#define FATTR4_WORD0_FILES_AVAIL (1UL << 21) -#define FATTR4_WORD0_FILES_FREE (1UL << 22) -#define FATTR4_WORD0_FILES_TOTAL (1UL << 23) -#define FATTR4_WORD0_FS_LOCATIONS (1UL << 24) -#define FATTR4_WORD0_HIDDEN (1UL << 25) -#define FATTR4_WORD0_HOMOGENEOUS (1UL << 26) -#define FATTR4_WORD0_MAXFILESIZE (1UL << 27) -#define FATTR4_WORD0_MAXLINK (1UL << 28) -#define FATTR4_WORD0_MAXNAME (1UL << 29) -#define FATTR4_WORD0_MAXREAD (1UL << 30) -#define FATTR4_WORD0_MAXWRITE (1UL << 31) -#define FATTR4_WORD1_MIMETYPE (1UL << 0) -#define FATTR4_WORD1_MODE (1UL << 1) -#define FATTR4_WORD1_NO_TRUNC (1UL << 2) -#define FATTR4_WORD1_NUMLINKS (1UL << 3) -#define FATTR4_WORD1_OWNER (1UL << 4) -#define FATTR4_WORD1_OWNER_GROUP (1UL << 5) -#define FATTR4_WORD1_QUOTA_HARD (1UL << 6) -#define FATTR4_WORD1_QUOTA_SOFT (1UL << 7) -#define FATTR4_WORD1_QUOTA_USED (1UL << 8) -#define FATTR4_WORD1_RAWDEV (1UL << 9) -#define FATTR4_WORD1_SPACE_AVAIL (1UL << 10) -#define FATTR4_WORD1_SPACE_FREE (1UL << 11) -#define FATTR4_WORD1_SPACE_TOTAL (1UL << 12) -#define FATTR4_WORD1_SPACE_USED (1UL << 13) -#define FATTR4_WORD1_SYSTEM (1UL << 14) -#define FATTR4_WORD1_TIME_ACCESS (1UL << 15) -#define FATTR4_WORD1_TIME_ACCESS_SET (1UL << 16) -#define FATTR4_WORD1_TIME_BACKUP (1UL << 17) -#define FATTR4_WORD1_TIME_CREATE (1UL << 18) -#define FATTR4_WORD1_TIME_DELTA (1UL << 19) -#define FATTR4_WORD1_TIME_METADATA (1UL << 20) -#define FATTR4_WORD1_TIME_MODIFY (1UL << 21) -#define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22) -#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) -#define FATTR4_WORD1_DACL (1UL << 26) -#define FATTR4_WORD1_SACL (1UL << 27) -#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) -#define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0) -#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) -#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) -#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) -#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) -#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) -#define FATTR4_WORD2_MODE_UMASK (1UL << 17) -#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) +#define FATTR4_WORD0_ACL BIT(FATTR4_ACL) +#define FATTR4_WORD0_ACLSUPPORT BIT(FATTR4_ACLSUPPORT) +#define FATTR4_WORD0_ARCHIVE BIT(FATTR4_ARCHIVE) +#define FATTR4_WORD0_CANSETTIME BIT(FATTR4_CANSETTIME) +#define FATTR4_WORD0_CASE_INSENSITIVE BIT(FATTR4_CASE_INSENSITIVE) +#define FATTR4_WORD0_CASE_PRESERVING BIT(FATTR4_CASE_PRESERVING) +#define FATTR4_WORD0_CHOWN_RESTRICTED BIT(FATTR4_CHOWN_RESTRICTED) +#define FATTR4_WORD0_FILEHANDLE BIT(FATTR4_FILEHANDLE) +#define FATTR4_WORD0_FILEID BIT(FATTR4_FILEID) +#define FATTR4_WORD0_FILES_AVAIL BIT(FATTR4_FILES_AVAIL) +#define FATTR4_WORD0_FILES_FREE BIT(FATTR4_FILES_FREE) +#define FATTR4_WORD0_FILES_TOTAL BIT(FATTR4_FILES_TOTAL) +#define FATTR4_WORD0_FS_LOCATIONS BIT(FATTR4_FS_LOCATIONS) +#define FATTR4_WORD0_HIDDEN BIT(FATTR4_HIDDEN) +#define FATTR4_WORD0_HOMOGENEOUS BIT(FATTR4_HOMOGENEOUS) +#define FATTR4_WORD0_MAXFILESIZE BIT(FATTR4_MAXFILESIZE) +#define FATTR4_WORD0_MAXLINK BIT(FATTR4_MAXLINK) +#define FATTR4_WORD0_MAXNAME BIT(FATTR4_MAXNAME) +#define FATTR4_WORD0_MAXREAD BIT(FATTR4_MAXREAD) +#define FATTR4_WORD0_MAXWRITE BIT(FATTR4_MAXWRITE) + +#define FATTR4_WORD1_MIMETYPE BIT(FATTR4_MIMETYPE - 32) +#define FATTR4_WORD1_MODE BIT(FATTR4_MODE - 32) +#define FATTR4_WORD1_NO_TRUNC BIT(FATTR4_NO_TRUNC - 32) +#define FATTR4_WORD1_NUMLINKS BIT(FATTR4_NUMLINKS - 32) +#define FATTR4_WORD1_OWNER BIT(FATTR4_OWNER - 32) +#define FATTR4_WORD1_OWNER_GROUP BIT(FATTR4_OWNER_GROUP - 32) +#define FATTR4_WORD1_QUOTA_HARD BIT(FATTR4_QUOTA_AVAIL_HARD - 32) +#define FATTR4_WORD1_QUOTA_SOFT BIT(FATTR4_QUOTA_AVAIL_SOFT - 32) +#define FATTR4_WORD1_QUOTA_USED BIT(FATTR4_QUOTA_USED - 32) +#define FATTR4_WORD1_RAWDEV BIT(FATTR4_RAWDEV - 32) +#define FATTR4_WORD1_SPACE_AVAIL BIT(FATTR4_SPACE_AVAIL - 32) +#define FATTR4_WORD1_SPACE_FREE BIT(FATTR4_SPACE_FREE - 32) +#define FATTR4_WORD1_SPACE_TOTAL BIT(FATTR4_SPACE_TOTAL - 32) +#define FATTR4_WORD1_SPACE_USED BIT(FATTR4_SPACE_USED - 32) +#define FATTR4_WORD1_SYSTEM BIT(FATTR4_SYSTEM - 32) +#define FATTR4_WORD1_TIME_ACCESS BIT(FATTR4_TIME_ACCESS - 32) +#define FATTR4_WORD1_TIME_ACCESS_SET BIT(FATTR4_TIME_ACCESS_SET - 32) +#define FATTR4_WORD1_TIME_BACKUP BIT(FATTR4_TIME_BACKUP - 32) +#define FATTR4_WORD1_TIME_CREATE BIT(FATTR4_TIME_CREATE - 32) +#define FATTR4_WORD1_TIME_DELTA BIT(FATTR4_TIME_DELTA - 32) +#define FATTR4_WORD1_TIME_METADATA BIT(FATTR4_TIME_METADATA - 32) +#define FATTR4_WORD1_TIME_MODIFY BIT(FATTR4_TIME_MODIFY - 32) +#define FATTR4_WORD1_TIME_MODIFY_SET BIT(FATTR4_TIME_MODIFY_SET - 32) +#define FATTR4_WORD1_MOUNTED_ON_FILEID BIT(FATTR4_MOUNTED_ON_FILEID - 32) +#define FATTR4_WORD1_DACL BIT(FATTR4_DACL - 32) +#define FATTR4_WORD1_SACL BIT(FATTR4_SACL - 32) +#define FATTR4_WORD1_FS_LAYOUT_TYPES BIT(FATTR4_FS_LAYOUT_TYPES - 32) + +#define FATTR4_WORD2_LAYOUT_TYPES BIT(FATTR4_LAYOUT_TYPES - 64) +#define FATTR4_WORD2_LAYOUT_BLKSIZE BIT(FATTR4_LAYOUT_BLKSIZE - 64) +#define FATTR4_WORD2_MDSTHRESHOLD BIT(FATTR4_MDSTHRESHOLD - 64) +#define FATTR4_WORD2_CLONE_BLKSIZE BIT(FATTR4_CLONE_BLKSIZE - 64) +#define FATTR4_WORD2_CHANGE_ATTR_TYPE BIT(FATTR4_CHANGE_ATTR_TYPE - 64) +#define FATTR4_WORD2_SECURITY_LABEL BIT(FATTR4_SEC_LABEL - 64) +#define FATTR4_WORD2_MODE_UMASK BIT(FATTR4_MODE_UMASK - 64) +#define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 20eeba8b009d..cd797e00fe35 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -48,6 +48,7 @@ struct nfs_client { #define NFS_CS_NOPING 6 /* - don't ping on connect */ #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ +#define NFS_CS_PNFS 9 /* - Server used for pnfs */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -238,6 +239,7 @@ struct nfs_server { struct list_head delegations; struct list_head ss_copies; + unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; #define NFS_MIG_IN_TRANSITION (1) diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index aa9f4c6ebe26..1c315f854ea8 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -157,7 +157,9 @@ extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); -extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode); +extern void nfs_join_page_group(struct nfs_page *head, + struct nfs_commit_info *cinfo, + struct inode *inode); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 12bbb5c63664..539b57fbf3ce 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1772,7 +1772,7 @@ struct nfs_rpc_ops { void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); int (*link) (struct inode *, struct inode *, const struct qstr *); - int (*symlink) (struct inode *, struct dentry *, struct page *, + int (*symlink) (struct inode *, struct dentry *, struct folio *, unsigned int, struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, const struct qstr *); diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e3e6a64b98e0..e92e378df000 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -157,31 +157,31 @@ static inline void touch_nmi_watchdog(void) #ifdef arch_trigger_cpumask_backtrace static inline bool trigger_all_cpu_backtrace(void) { - arch_trigger_cpumask_backtrace(cpu_online_mask, false); + arch_trigger_cpumask_backtrace(cpu_online_mask, -1); return true; } -static inline bool trigger_allbutself_cpu_backtrace(void) +static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu) { - arch_trigger_cpumask_backtrace(cpu_online_mask, true); + arch_trigger_cpumask_backtrace(cpu_online_mask, exclude_cpu); return true; } static inline bool trigger_cpumask_backtrace(struct cpumask *mask) { - arch_trigger_cpumask_backtrace(mask, false); + arch_trigger_cpumask_backtrace(mask, -1); return true; } static inline bool trigger_single_cpu_backtrace(int cpu) { - arch_trigger_cpumask_backtrace(cpumask_of(cpu), false); + arch_trigger_cpumask_backtrace(cpumask_of(cpu), -1); return true; } /* generic implementation */ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, - bool exclude_self, + int exclude_cpu, void (*raise)(cpumask_t *mask)); bool nmi_cpu_backtrace(struct pt_regs *regs); @@ -190,7 +190,7 @@ static inline bool trigger_all_cpu_backtrace(void) { return false; } -static inline bool trigger_allbutself_cpu_backtrace(void) +static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu) { return false; } diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 86544707236a..45702bdcbceb 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -73,9 +73,7 @@ struct raw_notifier_head { struct srcu_notifier_head { struct mutex mutex; -#ifdef CONFIG_TREE_SRCU struct srcu_usage srcuu; -#endif struct srcu_struct srcu; struct notifier_block __rcu *head; }; @@ -106,7 +104,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #define RAW_NOTIFIER_INIT(name) { \ .head = NULL } -#ifdef CONFIG_TREE_SRCU #define SRCU_NOTIFIER_INIT(name, pcpu) \ { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ @@ -114,14 +111,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); .srcuu = __SRCU_USAGE_INIT(name.srcuu), \ .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ } -#else -#define SRCU_NOTIFIER_INIT(name, pcpu) \ - { \ - .mutex = __MUTEX_INITIALIZER(name.mutex), \ - .head = NULL, \ - .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ - } -#endif #define ATOMIC_NOTIFIER_HEAD(name) \ struct atomic_notifier_head name = \ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index fee881cded01..771cb0285872 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -29,7 +29,7 @@ struct fs_struct; * nsproxy is copied. */ struct nsproxy { - atomic_t count; + refcount_t count; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; @@ -102,14 +102,13 @@ int __init nsproxy_cache_init(void); static inline void put_nsproxy(struct nsproxy *ns) { - if (atomic_dec_and_test(&ns->count)) { + if (refcount_dec_and_test(&ns->count)) free_nsproxy(ns); - } } static inline void get_nsproxy(struct nsproxy *ns) { - atomic_inc(&ns->count); + refcount_inc(&ns->count); } #endif diff --git a/include/linux/numa.h b/include/linux/numa.h index 59df211d051f..a904861de800 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -12,6 +12,7 @@ #define MAX_NUMNODES (1 << NODES_SHIFT) #define NUMA_NO_NODE (-1) +#define NUMA_NO_MEMBLK (-1) /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO @@ -25,7 +26,7 @@ #include <asm/sparsemem.h> /* Generic implementation available */ -int numa_map_to_online_node(int node); +int numa_nearest_node(int node, unsigned int state); #ifndef memory_add_physaddr_to_nid static inline int memory_add_physaddr_to_nid(u64 start) @@ -43,11 +44,18 @@ static inline int phys_to_target_node(u64 start) return 0; } #endif +#ifndef numa_fill_memblks +static inline int __init numa_fill_memblks(u64 start, u64 end) +{ + return NUMA_NO_MEMBLK; +} +#endif #else /* !CONFIG_NUMA */ -static inline int numa_map_to_online_node(int node) +static inline int numa_nearest_node(int node, unsigned int state) { return NUMA_NO_NODE; } + static inline int memory_add_physaddr_to_nid(u64 start) { return 0; @@ -58,6 +66,8 @@ static inline int phys_to_target_node(u64 start) } #endif +#define numa_map_to_online_node(node) numa_nearest_node(node, N_ONLINE) + #ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP extern const struct attribute_group arch_node_dev_group; #endif diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index dcb8030062dd..c1d0bc5d9624 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -9,9 +9,9 @@ #include <crypto/kpp.h> struct nvme_dhchap_key { - u8 *key; size_t len; u8 hash; + u8 key[]; }; u32 nvme_auth_get_seqnum(void); @@ -24,10 +24,13 @@ const char *nvme_auth_digest_name(u8 hmac_id); size_t nvme_auth_hmac_hash_len(u8 hmac_id); u8 nvme_auth_hmac_id(const char *hmac_name); +u32 nvme_auth_key_struct_size(u32 key_len); struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret, u8 key_hash); void nvme_auth_free_key(struct nvme_dhchap_key *key); -u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn); +struct nvme_dhchap_key *nvme_auth_alloc_key(u32 len, u8 hash); +struct nvme_dhchap_key *nvme_auth_transform_key( + struct nvme_dhchap_key *key, char *nqn); int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key); int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, u8 *challenge, u8 *aug, size_t hlen); diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h new file mode 100644 index 000000000000..e10333d78dbb --- /dev/null +++ b/include/linux/nvme-keyring.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Hannes Reinecke, SUSE Labs + */ + +#ifndef _NVME_KEYRING_H +#define _NVME_KEYRING_H + +#if IS_ENABLED(CONFIG_NVME_KEYRING) + +key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn); + +key_serial_t nvme_keyring_id(void); + +#else + +static inline key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn) +{ + return 0; +} +static inline key_serial_t nvme_keyring_id(void) +{ + return 0; +} +#endif /* !CONFIG_NVME_KEYRING */ +#endif /* _NVME_KEYRING_H */ diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 57ebe1267f7f..e07e8978d691 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -18,6 +18,12 @@ enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, }; +enum nvme_tcp_tls_cipher { + NVME_TCP_TLS_CIPHER_INVALID = 0, + NVME_TCP_TLS_CIPHER_SHA256 = 1, + NVME_TCP_TLS_CIPHER_SHA384 = 2, +}; + enum nvme_tcp_fatal_error_status { NVME_TCP_FES_INVALID_PDU_HDR = 0x01, NVME_TCP_FES_PDU_SEQ_ERR = 0x02, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 26dd3f859d9d..44325c068b6a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -108,6 +108,13 @@ enum { NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ }; +/* TSAS SECTYPE for TCP transport */ +enum { + NVMF_TCP_SECTYPE_NONE = 0, /* No Security */ + NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ + NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ +}; + #define NVME_AQ_DEPTH 32 #define NVME_NR_AEN_COMMANDS 1 #define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) @@ -1493,6 +1500,9 @@ struct nvmf_disc_rsp_page_entry { __u16 pkey; __u8 resv10[246]; } rdma; + struct tcp { + __u8 sectype; + } tcp; } tsas; }; @@ -1722,7 +1732,7 @@ struct nvmf_auth_dhchap_success1_data { __u8 rsvd2; __u8 rvalid; __u8 rsvd3[7]; - /* 'hl' bytes of response value if 'rvalid' is set */ + /* 'hl' bytes of response value */ __u8 rval[]; }; diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index fa030d93b768..6ec4b9743e25 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -43,6 +43,8 @@ enum { NVMEM_REMOVE, NVMEM_CELL_ADD, NVMEM_CELL_REMOVE, + NVMEM_LAYOUT_ADD, + NVMEM_LAYOUT_REMOVE, }; #if IS_ENABLED(CONFIG_NVMEM) @@ -125,6 +127,12 @@ static inline int nvmem_cell_write(struct nvmem_cell *cell, return -EOPNOTSUPP; } +static inline int nvmem_cell_read_u8(struct device *dev, + const char *cell_id, u8 *val) +{ + return -EOPNOTSUPP; +} + static inline int nvmem_cell_read_u16(struct device *dev, const char *cell_id, u16 *val) { @@ -256,7 +264,7 @@ static inline struct nvmem_device *of_nvmem_device_get(struct device_node *np, static inline struct device_node * of_nvmem_layout_get_container(struct nvmem_device *nvmem) { - return ERR_PTR(-EOPNOTSUPP); + return NULL; } #endif /* CONFIG_NVMEM && CONFIG_OF */ diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index dae26295e6be..e3930835235b 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -82,13 +82,13 @@ struct nvmem_cell_info { * @owner: Pointer to exporter module. Used for refcounting. * @cells: Optional array of pre-defined NVMEM cells. * @ncells: Number of elements in cells. + * @add_legacy_fixed_of_cells: Read fixed NVMEM cells from old OF syntax. * @keepout: Optional array of keepout ranges (sorted ascending by start). * @nkeepout: Number of elements in the keepout array. * @type: Type of the nvmem storage * @read_only: Device is read-only. * @root_only: Device is accessibly to root only. * @of_node: If given, this will be used instead of the parent's of_node. - * @no_of_node: Device should not use the parent's of_node even if it's !NULL. * @reg_read: Callback to read data. * @reg_write: Callback to write data. * @size: Device size. @@ -112,6 +112,7 @@ struct nvmem_config { struct module *owner; const struct nvmem_cell_info *cells; int ncells; + bool add_legacy_fixed_of_cells; const struct nvmem_keepout *keepout; unsigned int nkeepout; enum nvmem_type type; @@ -120,7 +121,6 @@ struct nvmem_config { bool ignore_wp; struct nvmem_layout *layout; struct device_node *of_node; - bool no_of_node; nvmem_reg_read_t reg_read; nvmem_reg_write_t reg_write; int size; diff --git a/include/linux/objpool.h b/include/linux/objpool.h new file mode 100644 index 000000000000..15aff4a17f0c --- /dev/null +++ b/include/linux/objpool.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_OBJPOOL_H +#define _LINUX_OBJPOOL_H + +#include <linux/types.h> +#include <linux/refcount.h> + +/* + * objpool: ring-array based lockless MPMC queue + * + * Copyright: wuqiang.matt@bytedance.com,mhiramat@kernel.org + * + * objpool is a scalable implementation of high performance queue for + * object allocation and reclamation, such as kretprobe instances. + * + * With leveraging percpu ring-array to mitigate hot spots of memory + * contention, it delivers near-linear scalability for high parallel + * scenarios. The objpool is best suited for the following cases: + * 1) Memory allocation or reclamation are prohibited or too expensive + * 2) Consumers are of different priorities, such as irqs and threads + * + * Limitations: + * 1) Maximum objects (capacity) is fixed after objpool creation + * 2) All pre-allocated objects are managed in percpu ring array, + * which consumes more memory than linked lists + */ + +/** + * struct objpool_slot - percpu ring array of objpool + * @head: head sequence of the local ring array (to retrieve at) + * @tail: tail sequence of the local ring array (to append at) + * @last: the last sequence number marked as ready for retrieve + * @mask: bits mask for modulo capacity to compute array indexes + * @entries: object entries on this slot + * + * Represents a cpu-local array-based ring buffer, its size is specialized + * during initialization of object pool. The percpu objpool node is to be + * allocated from local memory for NUMA system, and to be kept compact in + * continuous memory: CPU assigned number of objects are stored just after + * the body of objpool_node. + * + * Real size of the ring array is far too smaller than the value range of + * head and tail, typed as uint32_t: [0, 2^32), so only lower bits (mask) + * of head and tail are used as the actual position in the ring array. In + * general the ring array is acting like a small sliding window, which is + * always moving forward in the loop of [0, 2^32). + */ +struct objpool_slot { + uint32_t head; + uint32_t tail; + uint32_t last; + uint32_t mask; + void *entries[]; +} __packed; + +struct objpool_head; + +/* + * caller-specified callback for object initial setup, it's only called + * once for each object (just after the memory allocation of the object) + */ +typedef int (*objpool_init_obj_cb)(void *obj, void *context); + +/* caller-specified cleanup callback for objpool destruction */ +typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context); + +/** + * struct objpool_head - object pooling metadata + * @obj_size: object size, aligned to sizeof(void *) + * @nr_objs: total objs (to be pre-allocated with objpool) + * @nr_cpus: local copy of nr_cpu_ids + * @capacity: max objs can be managed by one objpool_slot + * @gfp: gfp flags for kmalloc & vmalloc + * @ref: refcount of objpool + * @flags: flags for objpool management + * @cpu_slots: pointer to the array of objpool_slot + * @release: resource cleanup callback + * @context: caller-provided context + */ +struct objpool_head { + int obj_size; + int nr_objs; + int nr_cpus; + int capacity; + gfp_t gfp; + refcount_t ref; + unsigned long flags; + struct objpool_slot **cpu_slots; + objpool_fini_cb release; + void *context; +}; + +#define OBJPOOL_NR_OBJECT_MAX (1UL << 24) /* maximum numbers of total objects */ +#define OBJPOOL_OBJECT_SIZE_MAX (1UL << 16) /* maximum size of an object */ + +/** + * objpool_init() - initialize objpool and pre-allocated objects + * @pool: the object pool to be initialized, declared by caller + * @nr_objs: total objects to be pre-allocated by this object pool + * @object_size: size of an object (should be > 0) + * @gfp: flags for memory allocation (via kmalloc or vmalloc) + * @context: user context for object initialization callback + * @objinit: object initialization callback for extra setup + * @release: cleanup callback for extra cleanup task + * + * return value: 0 for success, otherwise error code + * + * All pre-allocated objects are to be zeroed after memory allocation. + * Caller could do extra initialization in objinit callback. objinit() + * will be called just after slot allocation and called only once for + * each object. After that the objpool won't touch any content of the + * objects. It's caller's duty to perform reinitialization after each + * pop (object allocation) or do clearance before each push (object + * reclamation). + */ +int objpool_init(struct objpool_head *pool, int nr_objs, int object_size, + gfp_t gfp, void *context, objpool_init_obj_cb objinit, + objpool_fini_cb release); + +/** + * objpool_pop() - allocate an object from objpool + * @pool: object pool + * + * return value: object ptr or NULL if failed + */ +void *objpool_pop(struct objpool_head *pool); + +/** + * objpool_push() - reclaim the object and return back to objpool + * @obj: object ptr to be pushed to objpool + * @pool: object pool + * + * return: 0 or error code (it fails only when user tries to push + * the same object multiple times or wrong "objects" into objpool) + */ +int objpool_push(void *obj, struct objpool_head *pool); + +/** + * objpool_drop() - discard the object and deref objpool + * @obj: object ptr to be discarded + * @pool: object pool + * + * return: 0 if objpool was released; -EAGAIN if there are still + * outstanding objects + * + * objpool_drop is normally for the release of outstanding objects + * after objpool cleanup (objpool_fini). Thinking of this example: + * kretprobe is unregistered and objpool_fini() is called to release + * all remained objects, but there are still objects being used by + * unfinished kretprobes (like blockable function: sys_accept). So + * only when the last outstanding object is dropped could the whole + * objpool be released along with the call of objpool_drop() + */ +int objpool_drop(void *obj, struct objpool_head *pool); + +/** + * objpool_free() - release objpool forcely (all objects to be freed) + * @pool: object pool to be released + */ +void objpool_free(struct objpool_head *pool); + +/** + * objpool_fini() - deref object pool (also releasing unused objects) + * @pool: object pool to be dereferenced + * + * objpool_fini() will try to release all remained free objects and + * then drop an extra reference of the objpool. If all objects are + * already returned to objpool (so called synchronous use cases), + * the objpool itself will be freed together. But if there are still + * outstanding objects (so called asynchronous use cases, such like + * blockable kretprobe), the objpool won't be released until all + * the outstanding objects are dropped, but the caller must assure + * there are no concurrent objpool_push() on the fly. Normally RCU + * is being required to make sure all ongoing objpool_push() must + * be finished before calling objpool_fini(), so does test_objpool, + * kretprobe or rethook + */ +void objpool_fini(struct objpool_head *pool); + +#endif /* _LINUX_OBJPOOL_H */ diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 03f82c2c2ebf..33212e93f4a6 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -48,13 +48,13 @@ #define ANNOTATE_NOENDBR \ "986: \n\t" \ ".pushsection .discard.noendbr\n\t" \ - ".long 986b - .\n\t" \ + ".long 986b\n\t" \ ".popsection\n\t" #define ASM_REACHABLE \ "998:\n\t" \ ".pushsection .discard.reachable\n\t" \ - ".long 998b - .\n\t" \ + ".long 998b\n\t" \ ".popsection\n\t" #else /* __ASSEMBLY__ */ @@ -66,7 +66,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL \ 999: \ .pushsection .discard.intra_function_calls; \ - .long 999b - .; \ + .long 999b; \ .popsection; /* @@ -118,7 +118,7 @@ .macro ANNOTATE_NOENDBR .Lhere_\@: .pushsection .discard.noendbr - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm @@ -130,7 +130,8 @@ * it will be ignored. */ .macro VALIDATE_UNRET_BEGIN -#if defined(CONFIG_NOINSTR_VALIDATION) && defined(CONFIG_CPU_UNRET_ENTRY) +#if defined(CONFIG_NOINSTR_VALIDATION) && \ + (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) .Lhere_\@: .pushsection .discard.validate_unret .long .Lhere_\@ - . @@ -141,7 +142,7 @@ .macro REACHABLE .Lhere_\@: .pushsection .discard.reachable - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm diff --git a/include/linux/of.h b/include/linux/of.h index 6ecde0515677..6a9ddf20e79a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -96,10 +96,12 @@ struct of_reconfig_data { struct property *old_prop; }; +extern const struct kobj_type of_node_ktype; +extern const struct fwnode_operations of_fwnode_ops; + /** * of_node_init - initialize a devicetree node * @node: Pointer to device node that has been created by kzalloc() - * @phandle_name: Name of property holding a phandle value * * On return the device_node refcount is set to one. Use of_node_put() * on @node when done to free the memory allocated for it. If the node @@ -107,9 +109,6 @@ struct of_reconfig_data { * whether to free the memory will be done by node->release(), which is * of_node_release(). */ -/* initialize a node */ -extern const struct kobj_type of_node_ktype; -extern const struct fwnode_operations of_fwnode_ops; static inline void of_node_init(struct device_node *node) { #if defined(CONFIG_OF_KOBJ) @@ -1580,6 +1579,29 @@ static inline int of_changeset_update_property(struct of_changeset *ocs, { return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop); } + +struct device_node *of_changeset_create_node(struct of_changeset *ocs, + struct device_node *parent, + const char *full_name); +int of_changeset_add_prop_string(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, const char *str); +int of_changeset_add_prop_string_array(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, + const char **str_array, size_t sz); +int of_changeset_add_prop_u32_array(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, + const u32 *array, size_t sz); +static inline int of_changeset_add_prop_u32(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, + const u32 val) +{ + return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1); +} + #else /* CONFIG_OF_DYNAMIC */ static inline int of_reconfig_notifier_register(struct notifier_block *nb) { @@ -1645,7 +1667,7 @@ struct of_overlay_notify_data { #ifdef CONFIG_OF_OVERLAY int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id); + int *ovcs_id, struct device_node *target_base); int of_overlay_remove(int *ovcs_id); int of_overlay_remove_all(void); @@ -1654,8 +1676,8 @@ int of_overlay_notifier_unregister(struct notifier_block *nb); #else -static inline int of_overlay_fdt_apply(void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id) +static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, + int *ovcs_id, struct device_node *target_base) { return -ENOTSUPP; } diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index d8045bcfc35e..fadfea575485 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -127,10 +127,4 @@ static inline int devm_of_platform_populate(struct device *dev) static inline void devm_of_platform_depopulate(struct device *dev) { } #endif -#if defined(CONFIG_OF_DYNAMIC) && defined(CONFIG_OF_ADDRESS) -extern void of_platform_register_reconfig_notifier(void); -#else -static inline void of_platform_register_reconfig_notifier(void) { } -#endif - #endif /* _LINUX_OF_PLATFORM_H */ diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 0f4a8903922a..3921fbed0b28 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -17,12 +17,10 @@ * build_OID_registry.pl to generate the data for look_up_OID(). */ enum OID { - OID_id_dsa_with_sha1, /* 1.2.840.10030.4.3 */ OID_id_dsa, /* 1.2.840.10040.4.1 */ OID_id_ecPublicKey, /* 1.2.840.10045.2.1 */ OID_id_prime192v1, /* 1.2.840.10045.3.1.1 */ OID_id_prime256v1, /* 1.2.840.10045.3.1.7 */ - OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */ OID_id_ecdsa_with_sha224, /* 1.2.840.10045.4.3.1 */ OID_id_ecdsa_with_sha256, /* 1.2.840.10045.4.3.2 */ OID_id_ecdsa_with_sha384, /* 1.2.840.10045.4.3.3 */ @@ -30,10 +28,6 @@ enum OID { /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ - OID_md2WithRSAEncryption, /* 1.2.840.113549.1.1.2 */ - OID_md3WithRSAEncryption, /* 1.2.840.113549.1.1.3 */ - OID_md4WithRSAEncryption, /* 1.2.840.113549.1.1.4 */ - OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */ OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */ OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */ OID_sha512WithRSAEncryption, /* 1.2.840.113549.1.1.13 */ @@ -49,11 +43,6 @@ enum OID { OID_smimeCapabilites, /* 1.2.840.113549.1.9.15 */ OID_smimeAuthenticatedAttrs, /* 1.2.840.113549.1.9.16.2.11 */ - /* {iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2)} */ - OID_md2, /* 1.2.840.113549.2.2 */ - OID_md4, /* 1.2.840.113549.2.4 */ - OID_md5, /* 1.2.840.113549.2.5 */ - OID_mskrb5, /* 1.2.840.48018.1.2.2 */ OID_krb5, /* 1.2.840.113554.1.2.2 */ OID_krb5u2u, /* 1.2.840.113554.1.2.2.3 */ @@ -67,6 +56,7 @@ enum OID { OID_msOutlookExpress, /* 1.3.6.1.4.1.311.16.4 */ OID_ntlmssp, /* 1.3.6.1.4.1.311.2.2.10 */ + OID_negoex, /* 1.3.6.1.4.1.311.2.2.30 */ OID_spnego, /* 1.3.6.1.5.5.2 */ @@ -74,7 +64,6 @@ enum OID { OID_PKU2U, /* 1.3.5.1.5.2.7 */ OID_Scram, /* 1.3.6.1.5.5.14 */ OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */ - OID_sha1, /* 1.3.14.3.2.26 */ OID_id_ansip384r1, /* 1.3.132.0.34 */ OID_sha256, /* 2.16.840.1.101.3.4.2.1 */ OID_sha384, /* 2.16.840.1.101.3.4.2.2 */ @@ -140,6 +129,17 @@ enum OID { OID_TPMImportableKey, /* 2.23.133.10.1.4 */ OID_TPMSealedData, /* 2.23.133.10.1.5 */ + /* CSOR FIPS-202 SHA-3 */ + OID_sha3_256, /* 2.16.840.1.101.3.4.2.8 */ + OID_sha3_384, /* 2.16.840.1.101.3.4.2.9 */ + OID_sha3_512, /* 2.16.840.1.101.3.4.2.10 */ + OID_id_ecdsa_with_sha3_256, /* 2.16.840.1.101.3.4.3.10 */ + OID_id_ecdsa_with_sha3_384, /* 2.16.840.1.101.3.4.3.11 */ + OID_id_ecdsa_with_sha3_512, /* 2.16.840.1.101.3.4.3.12 */ + OID_id_rsassa_pkcs1_v1_5_with_sha3_256, /* 2.16.840.1.101.3.4.3.14 */ + OID_id_rsassa_pkcs1_v1_5_with_sha3_384, /* 2.16.840.1.101.3.4.3.15 */ + OID_id_rsassa_pkcs1_v1_5_with_sha3_512, /* 2.16.840.1.101.3.4.3.16 */ + OID__NR }; diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f9b60313eaea..7b5cf4a5cd19 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -309,4 +309,39 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) #define struct_size_t(type, member, count) \ struct_size((type *)NULL, member, count) +/** + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: initializer expression (could be empty for no init). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer) \ + _Static_assert(__builtin_constant_p(count), \ + "onstack flex array members require compile-time const count"); \ + union { \ + u8 bytes[struct_size_t(type, member, count)]; \ + type obj; \ + } name##_u initializer; \ + type *name = (type *)&name##_u + +/** + * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * + * Define a zeroed, on-stack, instance of @type structure with a trailing + * flexible array member. + * Use __struct_size(@name) to get compile-time size of it afterwards. + */ +#define DEFINE_FLEX(type, name, member, count) \ + _DEFINE_FLEX(type, name, member, count, = {}) + #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 92a2063a0a23..a88e64acebfe 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -99,13 +99,15 @@ */ enum pageflags { PG_locked, /* Page is locked. Don't touch. */ + PG_writeback, /* Page is under writeback */ PG_referenced, PG_uptodate, PG_dirty, PG_lru, + PG_head, /* Must be in bit 6 */ + PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_active, PG_workingset, - PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_error, PG_slab, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ @@ -113,8 +115,6 @@ enum pageflags { PG_reserved, PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ - PG_writeback, /* Page is under writeback */ - PG_head, /* A head page */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ @@ -171,15 +171,6 @@ enum pageflags { /* Remapped by swiotlb-xen. */ PG_xen_remapped = PG_owner_priv_1, -#ifdef CONFIG_MEMORY_FAILURE - /* - * Compound pages. Stored in first tail page's flags. - * Indicates that at least one subpage is hwpoisoned in the - * THP. - */ - PG_has_hwpoisoned = PG_error, -#endif - /* non-lru isolated movable page */ PG_isolated = PG_reclaim, @@ -190,6 +181,17 @@ enum pageflags { /* For self-hosted memmap pages */ PG_vmemmap_self_hosted = PG_owner_priv_1, #endif + + /* + * Flags only valid for compound pages. Stored in first tail page's + * flags word. Cannot use the first 8 flags or any flag marked as + * PF_ANY. + */ + + /* At least one page in this folio has the hwpoison flag set */ + PG_has_hwpoisoned = PG_error, + PG_hugetlb = PG_active, + PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) @@ -691,6 +693,25 @@ TESTPAGEFLAG_FALSE(Ksm, ksm) u64 stable_page_flags(struct page *page); /** + * folio_xor_flags_has_waiters - Change some folio flags. + * @folio: The folio. + * @mask: Bits set in this word will be changed. + * + * This must only be used for flags which are changed with the folio + * lock held. For example, it is unsafe to use for PG_dirty as that + * can be set without the folio lock held. It can also only be used + * on flags which are in the range 0-6 as some of the implementations + * only affect those bits. + * + * Return: Whether there are tasks waiting on the folio. + */ +static inline bool folio_xor_flags_has_waiters(struct folio *folio, + unsigned long mask) +{ + return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0)); +} + +/** * folio_test_uptodate - Is this folio up to date? * @folio: The folio. * @@ -806,13 +827,32 @@ static inline void ClearPageCompound(struct page *page) BUG_ON(!PageHead(page)); ClearPageHead(page); } +PAGEFLAG(LargeRmappable, large_rmappable, PF_SECOND) +#else +TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) #endif #define PG_head_mask ((1UL << PG_head)) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); -bool folio_test_hugetlb(struct folio *folio); +SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) +CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) + +/** + * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs + * @folio: The folio to test. + * + * Context: Any context. Caller should have a reference on the folio to + * prevent it from being turned into a tail page. + * Return: True for hugetlbfs folios, false for anon folios or folios + * belonging to other filesystems. + */ +static inline bool folio_test_hugetlb(struct folio *folio) +{ + return folio_test_large(folio) && + test_bit(PG_hugetlb, folio_flags(folio, 1)); +} #else TESTPAGEFLAG_FALSE(Huge, hugetlb) #endif @@ -832,11 +872,6 @@ static inline int PageTransHuge(struct page *page) return PageHead(page); } -static inline bool folio_test_transhuge(struct folio *folio) -{ - return folio_test_head(folio); -} - /* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known @@ -908,6 +943,8 @@ static inline bool is_page_hwpoison(struct page *page) #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) +#define folio_test_type(folio, flag) \ + ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) static inline int page_type_has_type(unsigned int page_type) { @@ -919,27 +956,41 @@ static inline int page_has_type(struct page *page) return page_type_has_type(page->page_type); } -#define PAGE_TYPE_OPS(uname, lname) \ -static __always_inline int Page##uname(struct page *page) \ +#define PAGE_TYPE_OPS(uname, lname, fname) \ +static __always_inline int Page##uname(const struct page *page) \ { \ return PageType(page, PG_##lname); \ } \ +static __always_inline int folio_test_##fname(const struct folio *folio)\ +{ \ + return folio_test_type(folio, PG_##lname); \ +} \ static __always_inline void __SetPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!PageType(page, 0), page); \ page->page_type &= ~PG_##lname; \ } \ +static __always_inline void __folio_set_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ + folio->page.page_type &= ~PG_##lname; \ +} \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type |= PG_##lname; \ -} +} \ +static __always_inline void __folio_clear_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ + folio->page.page_type |= PG_##lname; \ +} \ /* * PageBuddy() indicates that the page is free and in the buddy system * (see mm/page_alloc.c). */ -PAGE_TYPE_OPS(Buddy, buddy) +PAGE_TYPE_OPS(Buddy, buddy, buddy) /* * PageOffline() indicates that the page is logically offline although the @@ -963,7 +1014,7 @@ PAGE_TYPE_OPS(Buddy, buddy) * pages should check PageOffline() and synchronize with such drivers using * page_offline_freeze()/page_offline_thaw(). */ -PAGE_TYPE_OPS(Offline, offline) +PAGE_TYPE_OPS(Offline, offline, offline) extern void page_offline_freeze(void); extern void page_offline_thaw(void); @@ -973,12 +1024,12 @@ extern void page_offline_end(void); /* * Marks pages in use as page tables. */ -PAGE_TYPE_OPS(Table, table) +PAGE_TYPE_OPS(Table, table, pgtable) /* * Marks guardpages used with debug_pagealloc. */ -PAGE_TYPE_OPS(Guard, guard) +PAGE_TYPE_OPS(Guard, guard, guard) extern bool is_free_buddy_page(struct page *page); @@ -1040,6 +1091,14 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) #define PAGE_FLAGS_CHECK_AT_PREP \ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) +/* + * Flags stored in the second page of a compound page. They may overlap + * the CHECK_AT_FREE flags above, so need to be cleared. + */ +#define PAGE_FLAGS_SECOND \ + (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ + 1UL << PG_hugetlb | 1UL << PG_large_rmappable) + #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) /** diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 67314f648aeb..be98564191e6 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -8,6 +8,7 @@ struct pglist_data; +#ifdef CONFIG_PAGE_EXTENSION /** * struct page_ext_operations - per page_ext client operations * @offset: Offset to the client's data within page_ext. Offset is returned to @@ -29,8 +30,6 @@ struct page_ext_operations { bool need_shared_flags; }; -#ifdef CONFIG_PAGE_EXTENSION - /* * The page_ext_flags users must set need_shared_flags to true. */ @@ -82,6 +81,12 @@ static inline void page_ext_init(void) extern struct page_ext *page_ext_get(struct page *page); extern void page_ext_put(struct page_ext *page_ext); +static inline void *page_ext_data(struct page_ext *page_ext, + struct page_ext_operations *ops) +{ + return (void *)(page_ext) + ops->offset; +} + static inline struct page_ext *page_ext_next(struct page_ext *curr) { void *next = curr; diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 5cb7bd2078ec..d8f344840643 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -144,9 +144,4 @@ static inline void set_page_idle(struct page *page) { folio_set_idle(page_folio(page)); } - -static inline void clear_page_idle(struct page *page) -{ - folio_clear_idle(page_folio(page)); -} #endif /* _LINUX_MM_PAGE_IDLE_H */ diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h index 01e16c7696ec..6722941c7cb8 100644 --- a/include/linux/page_table_check.h +++ b/include/linux/page_table_check.h @@ -14,18 +14,13 @@ extern struct static_key_true page_table_check_disabled; extern struct page_ext_operations page_table_check_ops; void __page_table_check_zero(struct page *page, unsigned int order); -void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t pte); -void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr, - pmd_t pmd); -void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr, - pud_t pud); -void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); -void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd); -void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pud); +void __page_table_check_pte_clear(struct mm_struct *mm, pte_t pte); +void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd); +void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud); +void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, + unsigned int nr); +void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd); +void __page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud); void __page_table_check_pte_clear_range(struct mm_struct *mm, unsigned long addr, pmd_t pmd); @@ -46,61 +41,55 @@ static inline void page_table_check_free(struct page *page, unsigned int order) __page_table_check_zero(page, order); } -static inline void page_table_check_pte_clear(struct mm_struct *mm, - unsigned long addr, pte_t pte) +static inline void page_table_check_pte_clear(struct mm_struct *mm, pte_t pte) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pte_clear(mm, addr, pte); + __page_table_check_pte_clear(mm, pte); } -static inline void page_table_check_pmd_clear(struct mm_struct *mm, - unsigned long addr, pmd_t pmd) +static inline void page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pmd_clear(mm, addr, pmd); + __page_table_check_pmd_clear(mm, pmd); } -static inline void page_table_check_pud_clear(struct mm_struct *mm, - unsigned long addr, pud_t pud) +static inline void page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pud_clear(mm, addr, pud); + __page_table_check_pud_clear(mm, pud); } -static inline void page_table_check_pte_set(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, - pte_t pte) +static inline void page_table_check_ptes_set(struct mm_struct *mm, + pte_t *ptep, pte_t pte, unsigned int nr) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pte_set(mm, addr, ptep, pte); + __page_table_check_ptes_set(mm, ptep, pte, nr); } -static inline void page_table_check_pmd_set(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp, +static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pmd_set(mm, addr, pmdp, pmd); + __page_table_check_pmd_set(mm, pmdp, pmd); } -static inline void page_table_check_pud_set(struct mm_struct *mm, - unsigned long addr, pud_t *pudp, +static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pud_set(mm, addr, pudp, pud); + __page_table_check_pud_set(mm, pudp, pud); } static inline void page_table_check_pte_clear_range(struct mm_struct *mm, @@ -123,35 +112,29 @@ static inline void page_table_check_free(struct page *page, unsigned int order) { } -static inline void page_table_check_pte_clear(struct mm_struct *mm, - unsigned long addr, pte_t pte) +static inline void page_table_check_pte_clear(struct mm_struct *mm, pte_t pte) { } -static inline void page_table_check_pmd_clear(struct mm_struct *mm, - unsigned long addr, pmd_t pmd) +static inline void page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd) { } -static inline void page_table_check_pud_clear(struct mm_struct *mm, - unsigned long addr, pud_t pud) +static inline void page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) { } -static inline void page_table_check_pte_set(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, - pte_t pte) +static inline void page_table_check_ptes_set(struct mm_struct *mm, + pte_t *ptep, pte_t pte, unsigned int nr) { } -static inline void page_table_check_pmd_set(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp, +static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd) { } -static inline void page_table_check_pud_set(struct mm_struct *mm, - unsigned long addr, pud_t *pudp, +static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud) { } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 716953ee1ebd..06142ff7f9ce 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -203,6 +203,9 @@ enum mapping_flags { /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, AS_LARGE_FOLIO_SUPPORT = 6, + AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */ + AS_STABLE_WRITES, /* must wait for writeback before modifying + folio contents */ }; /** @@ -273,6 +276,36 @@ static inline int mapping_use_writeback_tags(struct address_space *mapping) return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); } +static inline bool mapping_release_always(const struct address_space *mapping) +{ + return test_bit(AS_RELEASE_ALWAYS, &mapping->flags); +} + +static inline void mapping_set_release_always(struct address_space *mapping) +{ + set_bit(AS_RELEASE_ALWAYS, &mapping->flags); +} + +static inline void mapping_clear_release_always(struct address_space *mapping) +{ + clear_bit(AS_RELEASE_ALWAYS, &mapping->flags); +} + +static inline bool mapping_stable_writes(const struct address_space *mapping) +{ + return test_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_set_stable_writes(struct address_space *mapping) +{ + set_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_clear_stable_writes(struct address_space *mapping) +{ + clear_bit(AS_STABLE_WRITES, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->gfp_mask; @@ -373,23 +406,31 @@ static inline struct address_space *folio_file_mapping(struct folio *folio) return folio->mapping; } -static inline struct address_space *page_file_mapping(struct page *page) -{ - return folio_file_mapping(page_folio(page)); -} - -/* - * For file cache pages, return the address_space, otherwise return NULL +/** + * folio_flush_mapping - Find the file mapping this folio belongs to. + * @folio: The folio. + * + * For folios which are in the page cache, return the mapping that this + * page belongs to. Anonymous folios return NULL, even if they're in + * the swap cache. Other kinds of folio also return NULL. + * + * This is ONLY used by architecture cache flushing code. If you aren't + * writing cache flushing code, you want either folio_mapping() or + * folio_file_mapping(). */ -static inline struct address_space *page_mapping_file(struct page *page) +static inline struct address_space *folio_flush_mapping(struct folio *folio) { - struct folio *folio = page_folio(page); - if (unlikely(folio_test_swapcache(folio))) return NULL; + return folio_mapping(folio); } +static inline struct address_space *page_file_mapping(struct page *page) +{ + return folio_file_mapping(page_folio(page)); +} + /** * folio_inode - Get the host inode for this folio. * @folio: The folio. @@ -470,6 +511,19 @@ static inline void *detach_page_private(struct page *page) return folio_detach_private(page_folio(page)); } +/* + * There are some parts of the kernel which assume that PMD entries + * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, + * limit the maximum allocation order to PMD size. I'm not aware of any + * assumptions about maximum order if THP are disabled, but 8 seems like + * a good order (that's 1MB if you're using 4kB pages) + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER +#else +#define MAX_PAGECACHE_ORDER 8 +#endif + #ifdef CONFIG_NUMA struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order); #else @@ -501,22 +555,69 @@ pgoff_t page_cache_next_miss(struct address_space *mapping, pgoff_t page_cache_prev_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan); -#define FGP_ACCESSED 0x00000001 -#define FGP_LOCK 0x00000002 -#define FGP_CREAT 0x00000004 -#define FGP_WRITE 0x00000008 -#define FGP_NOFS 0x00000010 -#define FGP_NOWAIT 0x00000020 -#define FGP_FOR_MMAP 0x00000040 -#define FGP_STABLE 0x00000080 +/** + * typedef fgf_t - Flags for getting folios from the page cache. + * + * Most users of the page cache will not need to use these flags; + * there are convenience functions such as filemap_get_folio() and + * filemap_lock_folio(). For users which need more control over exactly + * what is done with the folios, these flags to __filemap_get_folio() + * are available. + * + * * %FGP_ACCESSED - The folio will be marked accessed. + * * %FGP_LOCK - The folio is returned locked. + * * %FGP_CREAT - If no folio is present then a new folio is allocated, + * added to the page cache and the VM's LRU list. The folio is + * returned locked. + * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the + * folio is already in cache. If the folio was allocated, unlock it + * before returning so the caller can do the same dance. + * * %FGP_WRITE - The folio will be written to by the caller. + * * %FGP_NOFS - __GFP_FS will get cleared in gfp. + * * %FGP_NOWAIT - Don't block on the folio lock. + * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) + * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin() + * implementation. + */ +typedef unsigned int __bitwise fgf_t; + +#define FGP_ACCESSED ((__force fgf_t)0x00000001) +#define FGP_LOCK ((__force fgf_t)0x00000002) +#define FGP_CREAT ((__force fgf_t)0x00000004) +#define FGP_WRITE ((__force fgf_t)0x00000008) +#define FGP_NOFS ((__force fgf_t)0x00000010) +#define FGP_NOWAIT ((__force fgf_t)0x00000020) +#define FGP_FOR_MMAP ((__force fgf_t)0x00000040) +#define FGP_STABLE ((__force fgf_t)0x00000080) +#define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) +/** + * fgf_set_order - Encode a length in the fgf_t flags. + * @size: The suggested size of the folio to create. + * + * The caller of __filemap_get_folio() can use this to suggest a preferred + * size for the folio that is created. If there is already a folio at + * the index, it will be returned, no matter what its size. If a folio + * is freshly created, it may be of a different size than requested + * due to alignment constraints, memory pressure, or the presence of + * other folios at nearby indices. + */ +static inline fgf_t fgf_set_order(size_t size) +{ + unsigned int shift = ilog2(size); + + if (shift <= PAGE_SHIFT) + return 0; + return (__force fgf_t)((shift - PAGE_SHIFT) << 26); +} + void *filemap_get_entry(struct address_space *mapping, pgoff_t index); struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp); + fgf_t fgp_flags, gfp_t gfp); struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp); + fgf_t fgp_flags, gfp_t gfp); /** * filemap_get_folio - Find and get a folio. @@ -590,7 +691,7 @@ static inline struct page *find_get_page(struct address_space *mapping, } static inline struct page *find_get_page_flags(struct address_space *mapping, - pgoff_t offset, int fgp_flags) + pgoff_t offset, fgf_t fgp_flags) { return pagecache_get_page(mapping, offset, fgp_flags, 0); } @@ -705,9 +806,6 @@ static inline pgoff_t folio_next_index(struct folio *folio) */ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) { - /* HugeTLBfs indexes the page cache in units of hpage_size */ - if (folio_test_hugetlb(folio)) - return &folio->page; return folio_page(folio, index & (folio_nr_pages(folio) - 1)); } @@ -723,9 +821,6 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) */ static inline bool folio_contains(struct folio *folio, pgoff_t index) { - /* HugeTLBfs indexes the page cache in units of hpage_size */ - if (folio_test_hugetlb(folio)) - return folio->index == index; return index - folio_index(folio) < folio_nr_pages(folio); } @@ -783,10 +878,9 @@ static inline struct folio *read_mapping_folio(struct address_space *mapping, } /* - * Get index of the page within radix-tree (but not for hugetlb pages). - * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) + * Get the offset in PAGE_SIZE (even for hugetlb pages). */ -static inline pgoff_t page_to_index(struct page *page) +static inline pgoff_t page_to_pgoff(struct page *page) { struct page *head; @@ -801,19 +895,6 @@ static inline pgoff_t page_to_index(struct page *page) return head->index + page - head; } -extern pgoff_t hugetlb_basepage_index(struct page *page); - -/* - * Get the offset in PAGE_SIZE (even for hugetlb pages). - * (TODO: hugetlb pages should have ->index in PAGE_SIZE) - */ -static inline pgoff_t page_to_pgoff(struct page *page) -{ - if (unlikely(PageHuge(page))) - return hugetlb_basepage_index(page); - return page_to_index(page); -} - /* * Return byte-offset into filesystem object for page. */ @@ -850,24 +931,16 @@ static inline loff_t folio_file_pos(struct folio *folio) /* * Get the offset in PAGE_SIZE (even for hugetlb folios). - * (TODO: hugetlb folios should have ->index in PAGE_SIZE) */ static inline pgoff_t folio_pgoff(struct folio *folio) { - if (unlikely(folio_test_hugetlb(folio))) - return hugetlb_basepage_index(&folio->page); return folio->index; } -extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, - unsigned long address); - static inline pgoff_t linear_page_index(struct vm_area_struct *vma, unsigned long address) { pgoff_t pgoff; - if (unlikely(is_vm_hugetlb_page(vma))) - return linear_hugepage_index(vma, address); pgoff = (address - vma->vm_start) >> PAGE_SHIFT; pgoff += vma->vm_pgoff; return pgoff; @@ -900,8 +973,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, void __folio_lock(struct folio *folio); int __folio_lock_killable(struct folio *folio); -bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, - unsigned int flags); +vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf); void unlock_page(struct page *page); void folio_unlock(struct folio *folio); @@ -1005,11 +1077,13 @@ static inline int folio_lock_killable(struct folio *folio) * Return value and mmap_lock implications depend on flags; see * __folio_lock_or_retry(). */ -static inline bool folio_lock_or_retry(struct folio *folio, - struct mm_struct *mm, unsigned int flags) +static inline vm_fault_t folio_lock_or_retry(struct folio *folio, + struct vm_fault *vmf) { might_sleep(); - return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags); + if (!folio_trylock(folio)) + return __folio_lock_or_retry(folio, vmf); + return 0; } /* @@ -1044,11 +1118,7 @@ static inline void wait_on_page_locked(struct page *page) folio_wait_locked(page_folio(page)); } -static inline int wait_on_page_locked_killable(struct page *page) -{ - return folio_wait_locked_killable(page_folio(page)); -} - +void folio_end_read(struct folio *folio, bool success); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); diff --git a/include/linux/parport.h b/include/linux/parport.h index 999eddd619b7..fff39bc30629 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -180,8 +180,6 @@ struct ieee1284_info { struct semaphore irq; }; -#define PARPORT_NAME_MAX_LEN 15 - /* A parallel port */ struct parport { unsigned long base; /* base address */ diff --git a/include/linux/pci.h b/include/linux/pci.h index c69a2cc1f412..dea043bc1e38 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -23,7 +23,7 @@ #ifndef LINUX_PCI_H #define LINUX_PCI_H - +#include <linux/args.h> #include <linux/mod_devicetable.h> #include <linux/types.h> @@ -366,8 +366,8 @@ struct pci_dev { pci_power_t current_state; /* Current operating state. In ACPI, this is D0-D3, D0 being fully functional, and D3 being off. */ - unsigned int imm_ready:1; /* Supports Immediate Readiness */ u8 pm_cap; /* PM capability offset */ + unsigned int imm_ready:1; /* Supports Immediate Readiness */ unsigned int pme_support:5; /* Bitmask of states from which PME# can be generated */ unsigned int pme_poll:1; /* Poll device's PME status bit */ @@ -392,9 +392,9 @@ struct pci_dev { #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state */ + u16 l1ss; /* L1SS Capability pointer */ unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ - u16 l1ss; /* L1SS Capability pointer */ #endif unsigned int pasid_no_tlp:1; /* PASID works without TLP Prefix */ unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */ @@ -464,12 +464,13 @@ struct pci_dev { unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */ unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */ + unsigned int rom_attr_enabled:1; /* Display of ROM attribute enabled? */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ + spinlock_t pcie_cap_lock; /* Protects RMW ops in capability accessors */ u32 saved_config_space[16]; /* Config space saved at suspend time */ struct hlist_head saved_cap_space; - int rom_attr_enabled; /* Display of ROM attribute enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ @@ -712,6 +713,30 @@ static inline bool pci_is_bridge(struct pci_dev *dev) dev->hdr_type == PCI_HEADER_TYPE_CARDBUS; } +/** + * pci_is_vga - check if the PCI device is a VGA device + * + * The PCI Code and ID Assignment spec, r1.15, secs 1.4 and 1.1, define + * VGA Base Class and Sub-Classes: + * + * 03 00 PCI_CLASS_DISPLAY_VGA VGA-compatible or 8514-compatible + * 00 01 PCI_CLASS_NOT_DEFINED_VGA VGA-compatible (before Class Code) + * + * Return true if the PCI device is a VGA device and uses the legacy VGA + * resources ([mem 0xa0000-0xbffff], [io 0x3b0-0x3bb], [io 0x3c0-0x3df] and + * aliases). + */ +static inline bool pci_is_vga(struct pci_dev *pdev) +{ + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + return true; + + if ((pdev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA) + return true; + + return false; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else @@ -1180,6 +1205,8 @@ struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, unsigned int devfn); struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from); +struct pci_dev *pci_get_base_class(unsigned int class, struct pci_dev *from); + int pci_dev_present(const struct pci_device_id *ids); int pci_bus_read_config_byte(struct pci_bus *bus, unsigned int devfn, @@ -1217,11 +1244,40 @@ int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val); int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val); int pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val); int pcie_capability_write_dword(struct pci_dev *dev, int pos, u32 val); -int pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos, - u16 clear, u16 set); +int pcie_capability_clear_and_set_word_unlocked(struct pci_dev *dev, int pos, + u16 clear, u16 set); +int pcie_capability_clear_and_set_word_locked(struct pci_dev *dev, int pos, + u16 clear, u16 set); int pcie_capability_clear_and_set_dword(struct pci_dev *dev, int pos, u32 clear, u32 set); +/** + * pcie_capability_clear_and_set_word - RMW accessor for PCI Express Capability Registers + * @dev: PCI device structure of the PCI Express device + * @pos: PCI Express Capability Register + * @clear: Clear bitmask + * @set: Set bitmask + * + * Perform a Read-Modify-Write (RMW) operation using @clear and @set + * bitmasks on PCI Express Capability Register at @pos. Certain PCI Express + * Capability Registers are accessed concurrently in RMW fashion, hence + * require locking which is handled transparently to the caller. + */ +static inline int pcie_capability_clear_and_set_word(struct pci_dev *dev, + int pos, + u16 clear, u16 set) +{ + switch (pos) { + case PCI_EXP_LNKCTL: + case PCI_EXP_RTCTL: + return pcie_capability_clear_and_set_word_locked(dev, pos, + clear, set); + default: + return pcie_capability_clear_and_set_word_unlocked(dev, pos, + clear, set); + } +} + static inline int pcie_capability_set_word(struct pci_dev *dev, int pos, u16 set) { @@ -1403,7 +1459,6 @@ void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge); void pci_assign_unassigned_bus_resources(struct pci_bus *bus); void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus); int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type); -void pdev_enable_device(struct pci_dev *); int pci_enable_resources(struct pci_dev *, int mask); void pci_assign_irq(struct pci_dev *dev); struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res); @@ -1595,6 +1650,8 @@ struct msix_entry { u16 entry; /* Driver uses to specify entry, OS writes */ }; +struct msi_domain_template; + #ifdef CONFIG_PCI_MSI int pci_msi_vec_count(struct pci_dev *dev); void pci_disable_msi(struct pci_dev *dev); @@ -1627,6 +1684,11 @@ void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map); void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); +bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, + unsigned int hwsize, void *data); +struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie, + const struct irq_affinity_desc *affdesc); +void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } @@ -1690,6 +1752,25 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, { return cpu_possible_mask; } + +static inline bool pci_create_ims_domain(struct pci_dev *pdev, + const struct msi_domain_template *template, + unsigned int hwsize, void *data) +{ return false; } + +static inline struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, + union msi_instance_cookie *icookie, + const struct irq_affinity_desc *affdesc) +{ + struct msi_map map = { .index = -ENOSYS, }; + + return map; +} + +static inline void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map) +{ +} + #endif /** @@ -1748,6 +1829,7 @@ extern bool pcie_ports_native; int pci_disable_link_state(struct pci_dev *pdev, int state); int pci_disable_link_state_locked(struct pci_dev *pdev, int state); int pci_enable_link_state(struct pci_dev *pdev, int state); +int pci_enable_link_state_locked(struct pci_dev *pdev, int state); void pcie_no_aspm(void); bool pcie_aspm_support_enabled(void); bool pcie_aspm_enabled(struct pci_dev *pdev); @@ -1758,6 +1840,8 @@ static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { return 0; } static inline int pci_enable_link_state(struct pci_dev *pdev, int state) { return 0; } +static inline int pci_enable_link_state_locked(struct pci_dev *pdev, int state) +{ return 0; } static inline void pcie_no_aspm(void) { } static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } @@ -1895,6 +1979,9 @@ static inline struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from) { return NULL; } +static inline struct pci_dev *pci_get_base_class(unsigned int class, + struct pci_dev *from) +{ return NULL; } static inline int pci_dev_present(const struct pci_device_id *ids) { return 0; } @@ -2260,6 +2347,11 @@ int pcibios_alloc_irq(struct pci_dev *dev); void pcibios_free_irq(struct pci_dev *dev); resource_size_t pcibios_default_alignment(void); +#if !defined(HAVE_PCI_MMAP) && !defined(ARCH_GENERIC_PCI_MMAP_RESOURCE) +extern int pci_create_resource_files(struct pci_dev *dev); +extern void pci_remove_resource_files(struct pci_dev *dev); +#endif + #if defined(CONFIG_PCI_MMCONFIG) || defined(CONFIG_ACPI_MCFG) void __init pci_mmcfg_early_init(void); void __init pci_mmcfg_late_init(void); @@ -2582,14 +2674,6 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif -struct msi_domain_template; - -bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, - unsigned int hwsize, void *data); -struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie, - const struct irq_affinity_desc *affdesc); -void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); - #include <linux/dma-mapping.h> #define pci_printk(level, pdev, fmt, arg...) \ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2dc75df1437f..275799b5f535 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -180,6 +180,8 @@ #define PCI_DEVICE_ID_BERKOM_A4T 0xffa4 #define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO 0xffa8 +#define PCI_VENDOR_ID_ITTIM 0x0b48 + #define PCI_VENDOR_ID_COMPAQ 0x0e11 #define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508 #define PCI_DEVICE_ID_COMPAQ_TACHYON 0xa0fc @@ -576,7 +578,11 @@ #define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb +#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 +#define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 +#define PCI_DEVICE_ID_AMD_MI300_DF_F3 0x152b +#define PCI_DEVICE_ID_AMD_VANGOGH_USB 0x163a #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 @@ -1760,6 +1766,10 @@ #define PCI_SUBDEVICE_ID_AT_2700FX 0x2701 #define PCI_SUBDEVICE_ID_AT_2701FX 0x2703 +#define PCI_VENDOR_ID_ASIX 0x125b +#define PCI_DEVICE_ID_ASIX_AX99100 0x9100 +#define PCI_DEVICE_ID_ASIX_AX99100_LB 0x9110 + #define PCI_VENDOR_ID_ESS 0x125d #define PCI_DEVICE_ID_ESS_ESS1968 0x1968 #define PCI_DEVICE_ID_ESS_ESS1978 0x1978 @@ -2644,6 +2654,7 @@ #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 +#define PCI_DEVICE_ID_INTEL_HDA_CML_LP 0x02c8 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 @@ -2659,8 +2670,10 @@ #define PCI_DEVICE_ID_INTEL_82424 0x0483 #define PCI_DEVICE_ID_INTEL_82378 0x0484 #define PCI_DEVICE_ID_INTEL_82425 0x0486 +#define PCI_DEVICE_ID_INTEL_HDA_CML_H 0x06c8 #define PCI_DEVICE_ID_INTEL_MRST_SD0 0x0807 #define PCI_DEVICE_ID_INTEL_MRST_SD1 0x0808 +#define PCI_DEVICE_ID_INTEL_HDA_OAKTRAIL 0x080a #define PCI_DEVICE_ID_INTEL_MFD_SD 0x0820 #define PCI_DEVICE_ID_INTEL_MFD_SDIO1 0x0821 #define PCI_DEVICE_ID_INTEL_MFD_SDIO2 0x0822 @@ -2670,15 +2683,19 @@ #define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095e #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 +#define PCI_DEVICE_ID_INTEL_HDA_HSW_0 0x0a0c +#define PCI_DEVICE_ID_INTEL_HDA_HSW_2 0x0c0c #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60 +#define PCI_DEVICE_ID_INTEL_HDA_HSW_3 0x0d0c +#define PCI_DEVICE_ID_INTEL_HDA_BYT 0x0f04 +#define PCI_DEVICE_ID_INTEL_SST_BYT 0x0f28 #define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 #define PCI_DEVICE_ID_INTEL_82573E_SOL 0x1085 #define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108f #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 +#define PCI_DEVICE_ID_INTEL_SST_TNG 0x119a #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 -#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 -#define PCI_DEVICE_ID_INTEL_7205_0 0x255d #define PCI_DEVICE_ID_INTEL_82437 0x122d #define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e #define PCI_DEVICE_ID_INTEL_82371FB_1 0x1230 @@ -2704,20 +2721,26 @@ #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_BRIDGE 0x1576 #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_NHI 0x1577 #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_BRIDGE 0x1578 +#define PCI_DEVICE_ID_INTEL_HDA_BDW 0x160c #define PCI_DEVICE_ID_INTEL_80960_RP 0x1960 #define PCI_DEVICE_ID_INTEL_QAT_C3XXX 0x19e2 #define PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF 0x19e3 #define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21 #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 #define PCI_DEVICE_ID_INTEL_IOAT 0x1a38 +#define PCI_DEVICE_ID_INTEL_HDA_CPT 0x1c20 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN 0x1c41 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f +#define PCI_DEVICE_ID_INTEL_HDA_PBG 0x1d20 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0 0x1d40 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1 0x1d41 +#define PCI_DEVICE_ID_INTEL_HDA_PPT 0x1e20 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI 0x1e31 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN 0x1e40 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX 0x1e5f #define PCI_DEVICE_ID_INTEL_VMD_201D 0x201d +#define PCI_DEVICE_ID_INTEL_HDA_BSW 0x2284 +#define PCI_DEVICE_ID_INTEL_SST_BSW 0x22a8 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN 0x2310 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX 0x231f #define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410 @@ -2772,6 +2795,8 @@ #define PCI_DEVICE_ID_INTEL_82850_HB 0x2530 #define PCI_DEVICE_ID_INTEL_82860_HB 0x2531 #define PCI_DEVICE_ID_INTEL_E7501_MCH 0x254c +#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 +#define PCI_DEVICE_ID_INTEL_7205_0 0x255d #define PCI_DEVICE_ID_INTEL_82845G_HB 0x2560 #define PCI_DEVICE_ID_INTEL_82845G_IG 0x2562 #define PCI_DEVICE_ID_INTEL_82865_HB 0x2570 @@ -2793,12 +2818,14 @@ #define PCI_DEVICE_ID_INTEL_ICH6_0 0x2640 #define PCI_DEVICE_ID_INTEL_ICH6_1 0x2641 #define PCI_DEVICE_ID_INTEL_ICH6_2 0x2642 +#define PCI_DEVICE_ID_INTEL_HDA_ICH6 0x2668 #define PCI_DEVICE_ID_INTEL_ICH6_16 0x266a #define PCI_DEVICE_ID_INTEL_ICH6_17 0x266d #define PCI_DEVICE_ID_INTEL_ICH6_18 0x266e #define PCI_DEVICE_ID_INTEL_ICH6_19 0x266f #define PCI_DEVICE_ID_INTEL_ESB2_0 0x2670 #define PCI_DEVICE_ID_INTEL_ESB2_14 0x2698 +#define PCI_DEVICE_ID_INTEL_HDA_ESB2 0x269a #define PCI_DEVICE_ID_INTEL_ESB2_17 0x269b #define PCI_DEVICE_ID_INTEL_ESB2_18 0x269e #define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770 @@ -2806,11 +2833,12 @@ #define PCI_DEVICE_ID_INTEL_3000_HB 0x2778 #define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27a0 #define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27a2 +#define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0 #define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8 #define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9 -#define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0 #define PCI_DEVICE_ID_INTEL_TGP_LPC 0x27bc #define PCI_DEVICE_ID_INTEL_ICH7_31 0x27bd +#define PCI_DEVICE_ID_INTEL_HDA_ICH7 0x27d8 #define PCI_DEVICE_ID_INTEL_ICH7_17 0x27da #define PCI_DEVICE_ID_INTEL_ICH7_19 0x27dd #define PCI_DEVICE_ID_INTEL_ICH7_20 0x27de @@ -2821,17 +2849,20 @@ #define PCI_DEVICE_ID_INTEL_ICH8_3 0x2814 #define PCI_DEVICE_ID_INTEL_ICH8_4 0x2815 #define PCI_DEVICE_ID_INTEL_ICH8_5 0x283e +#define PCI_DEVICE_ID_INTEL_HDA_ICH8 0x284b #define PCI_DEVICE_ID_INTEL_ICH8_6 0x2850 #define PCI_DEVICE_ID_INTEL_VMD_28C0 0x28c0 #define PCI_DEVICE_ID_INTEL_ICH9_0 0x2910 -#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2917 #define PCI_DEVICE_ID_INTEL_ICH9_2 0x2912 #define PCI_DEVICE_ID_INTEL_ICH9_3 0x2913 #define PCI_DEVICE_ID_INTEL_ICH9_4 0x2914 -#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919 -#define PCI_DEVICE_ID_INTEL_ICH9_6 0x2930 #define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916 +#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2917 #define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918 +#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919 +#define PCI_DEVICE_ID_INTEL_ICH9_6 0x2930 +#define PCI_DEVICE_ID_INTEL_HDA_ICH9_0 0x293e +#define PCI_DEVICE_ID_INTEL_HDA_ICH9_1 0x293f #define PCI_DEVICE_ID_INTEL_I7_MCR 0x2c18 #define PCI_DEVICE_ID_INTEL_I7_MC_TAD 0x2c19 #define PCI_DEVICE_ID_INTEL_I7_MC_RAS 0x2c1a @@ -2848,8 +2879,8 @@ #define PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR 0x2c31 #define PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK 0x2c32 #define PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC 0x2c33 -#define PCI_DEVICE_ID_INTEL_I7_NONCORE 0x2c41 #define PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT 0x2c40 +#define PCI_DEVICE_ID_INTEL_I7_NONCORE 0x2c41 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE 0x2c50 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT 0x2c51 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2 0x2c70 @@ -2883,6 +2914,7 @@ #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2 0x2db1 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2 0x2db2 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2 0x2db3 +#define PCI_DEVICE_ID_INTEL_HDA_GML 0x3198 #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340 #define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429 #define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a @@ -2893,12 +2925,13 @@ #define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431 #define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432 #define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433 +#define PCI_DEVICE_ID_INTEL_HDA_ICL_LP 0x34c8 #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575 #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 -#define PCI_DEVICE_ID_INTEL_82854_HB 0x358c -#define PCI_DEVICE_ID_INTEL_82854_IG 0x358e #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580 #define PCI_DEVICE_ID_INTEL_82855GM_IG 0x3582 +#define PCI_DEVICE_ID_INTEL_82854_HB 0x358c +#define PCI_DEVICE_ID_INTEL_82854_IG 0x358e #define PCI_DEVICE_ID_INTEL_E7520_MCH 0x3590 #define PCI_DEVICE_ID_INTEL_E7320_MCH 0x3592 #define PCI_DEVICE_ID_INTEL_MCH_PA 0x3595 @@ -2908,11 +2941,11 @@ #define PCI_DEVICE_ID_INTEL_MCH_PC 0x3599 #define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e +#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b +#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c #define PCI_DEVICE_ID_INTEL_I7300_MCH_ERR 0x360c #define PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 0x360f #define PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 0x3610 -#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b -#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c #define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 #define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711 #define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712 @@ -2925,14 +2958,19 @@ #define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 #define PCI_DEVICE_ID_INTEL_QAT_C62X 0x37c8 #define PCI_DEVICE_ID_INTEL_QAT_C62X_VF 0x37c9 +#define PCI_DEVICE_ID_INTEL_HDA_ICL_N 0x38c8 #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 #define PCI_DEVICE_ID_INTEL_ICH10_3 0x3a1a #define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30 +#define PCI_DEVICE_ID_INTEL_HDA_ICH10_0 0x3a3e #define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60 +#define PCI_DEVICE_ID_INTEL_HDA_ICH10_1 0x3a6e #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN 0x3b00 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX 0x3b1f +#define PCI_DEVICE_ID_INTEL_HDA_5_3400_SERIES_0 0x3b56 +#define PCI_DEVICE_ID_INTEL_HDA_5_3400_SERIES_1 0x3b57 #define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20 #define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21 #define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22 @@ -2943,16 +2981,12 @@ #define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27 #define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e #define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f -#define PCI_DEVICE_ID_INTEL_UNC_HA 0x3c46 -#define PCI_DEVICE_ID_INTEL_UNC_IMC0 0x3cb0 -#define PCI_DEVICE_ID_INTEL_UNC_IMC1 0x3cb1 -#define PCI_DEVICE_ID_INTEL_UNC_IMC2 0x3cb4 -#define PCI_DEVICE_ID_INTEL_UNC_IMC3 0x3cb5 #define PCI_DEVICE_ID_INTEL_UNC_QPI0 0x3c41 #define PCI_DEVICE_ID_INTEL_UNC_QPI1 0x3c42 #define PCI_DEVICE_ID_INTEL_UNC_R2PCIE 0x3c43 #define PCI_DEVICE_ID_INTEL_UNC_R3QPI0 0x3c44 #define PCI_DEVICE_ID_INTEL_UNC_R3QPI1 0x3c45 +#define PCI_DEVICE_ID_INTEL_UNC_HA 0x3c46 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */ @@ -2964,17 +2998,40 @@ #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */ +#define PCI_DEVICE_ID_INTEL_UNC_IMC0 0x3cb0 +#define PCI_DEVICE_ID_INTEL_UNC_IMC1 0x3cb1 +#define PCI_DEVICE_ID_INTEL_UNC_IMC2 0x3cb4 +#define PCI_DEVICE_ID_INTEL_UNC_IMC3 0x3cb5 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */ #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX 0x3ce0 #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */ +#define PCI_DEVICE_ID_INTEL_HDA_ICL_H 0x3dc8 #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 #define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035 #define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036 +#define PCI_DEVICE_ID_INTEL_HDA_TGL_H 0x43c8 +#define PCI_DEVICE_ID_INTEL_HDA_DG1 0x490d +#define PCI_DEVICE_ID_INTEL_HDA_EHL_0 0x4b55 +#define PCI_DEVICE_ID_INTEL_HDA_EHL_3 0x4b58 +#define PCI_DEVICE_ID_INTEL_HDA_JSL_N 0x4dc8 +#define PCI_DEVICE_ID_INTEL_HDA_DG2_0 0x4f90 +#define PCI_DEVICE_ID_INTEL_HDA_DG2_1 0x4f91 +#define PCI_DEVICE_ID_INTEL_HDA_DG2_2 0x4f92 #define PCI_DEVICE_ID_INTEL_EP80579_0 0x5031 #define PCI_DEVICE_ID_INTEL_EP80579_1 0x5032 +#define PCI_DEVICE_ID_INTEL_HDA_ADL_P 0x51c8 +#define PCI_DEVICE_ID_INTEL_HDA_ADL_PS 0x51c9 +#define PCI_DEVICE_ID_INTEL_HDA_RPL_P_0 0x51ca +#define PCI_DEVICE_ID_INTEL_HDA_RPL_P_1 0x51cb +#define PCI_DEVICE_ID_INTEL_HDA_ADL_M 0x51cc +#define PCI_DEVICE_ID_INTEL_HDA_ADL_PX 0x51cd +#define PCI_DEVICE_ID_INTEL_HDA_RPL_M 0x51ce +#define PCI_DEVICE_ID_INTEL_HDA_RPL_PX 0x51cf +#define PCI_DEVICE_ID_INTEL_HDA_ADL_N 0x54c8 +#define PCI_DEVICE_ID_INTEL_HDA_APL 0x5a98 #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 #define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 @@ -3008,8 +3065,13 @@ #define PCI_DEVICE_ID_INTEL_82443GX_0 0x71a0 #define PCI_DEVICE_ID_INTEL_82443GX_2 0x71a2 #define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601 +#define PCI_DEVICE_ID_INTEL_HDA_RPL_S 0x7a50 +#define PCI_DEVICE_ID_INTEL_HDA_ADL_S 0x7ad0 +#define PCI_DEVICE_ID_INTEL_HDA_MTL 0x7e28 +#define PCI_DEVICE_ID_INTEL_HDA_ARL_S 0x7f50 #define PCI_DEVICE_ID_INTEL_SCH_LPC 0x8119 #define PCI_DEVICE_ID_INTEL_SCH_IDE 0x811a +#define PCI_DEVICE_ID_INTEL_HDA_POULSBO 0x811b #define PCI_DEVICE_ID_INTEL_E6XX_CU 0x8183 #define PCI_DEVICE_ID_INTEL_ITC_LPC 0x8186 #define PCI_DEVICE_ID_INTEL_82454GX 0x84c4 @@ -3018,9 +3080,31 @@ #define PCI_DEVICE_ID_INTEL_82454NX 0x84cb #define PCI_DEVICE_ID_INTEL_84460GX 0x84ea #define PCI_DEVICE_ID_INTEL_IXP4XX 0x8500 +#define PCI_DEVICE_ID_INTEL_HDA_LPT 0x8c20 +#define PCI_DEVICE_ID_INTEL_HDA_9_SERIES 0x8ca0 +#define PCI_DEVICE_ID_INTEL_HDA_WBG_0 0x8d20 +#define PCI_DEVICE_ID_INTEL_HDA_WBG_1 0x8d21 #define PCI_DEVICE_ID_INTEL_IXP2800 0x9004 +#define PCI_DEVICE_ID_INTEL_HDA_LKF 0x98c8 #define PCI_DEVICE_ID_INTEL_VMD_9A0B 0x9a0b +#define PCI_DEVICE_ID_INTEL_HDA_LPT_LP_0 0x9c20 +#define PCI_DEVICE_ID_INTEL_HDA_LPT_LP_1 0x9c21 +#define PCI_DEVICE_ID_INTEL_HDA_WPT_LP 0x9ca0 +#define PCI_DEVICE_ID_INTEL_HDA_SKL_LP 0x9d70 +#define PCI_DEVICE_ID_INTEL_HDA_KBL_LP 0x9d71 +#define PCI_DEVICE_ID_INTEL_HDA_CNL_LP 0x9dc8 +#define PCI_DEVICE_ID_INTEL_HDA_TGL_LP 0xa0c8 +#define PCI_DEVICE_ID_INTEL_HDA_SKL 0xa170 +#define PCI_DEVICE_ID_INTEL_HDA_KBL 0xa171 +#define PCI_DEVICE_ID_INTEL_HDA_LBG_0 0xa1f0 +#define PCI_DEVICE_ID_INTEL_HDA_LBG_1 0xa270 +#define PCI_DEVICE_ID_INTEL_HDA_KBL_H 0xa2f0 +#define PCI_DEVICE_ID_INTEL_HDA_CNL_H 0xa348 +#define PCI_DEVICE_ID_INTEL_HDA_CML_S 0xa3f0 +#define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828 #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 +#define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8 +#define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8 #define PCI_VENDOR_ID_WANGXUN 0x8088 diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index ff99cf7a5d0d..da3a6c30f6d2 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -20,12 +20,20 @@ #define DW_AN_C37_1000BASEX 4 #define DW_10GBASER 5 +/* device vendor OUI */ +#define DW_OUI_WX 0x0018fc80 + +/* dev_flag */ +#define DW_DEV_TXGBE BIT(0) + struct xpcs_id; struct dw_xpcs { struct mdio_device *mdiodev; const struct xpcs_id *id; struct phylink_pcs pcs; + phy_interface_t interface; + int dev_flag; }; int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index bcba7fda3cc9..4b4e9a98b37b 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -818,6 +818,367 @@ struct pds_vdpa_set_features_cmd { __le64 features; }; +#define PDS_LM_DEVICE_STATE_LENGTH 65536 +#define PDS_LM_CHECK_DEVICE_STATE_LENGTH(X) \ + PDS_CORE_SIZE_CHECK(union, PDS_LM_DEVICE_STATE_LENGTH, X) + +/* + * enum pds_lm_cmd_opcode - Live Migration Device commands + */ +enum pds_lm_cmd_opcode { + PDS_LM_CMD_HOST_VF_STATUS = 1, + + /* Device state commands */ + PDS_LM_CMD_STATE_SIZE = 16, + PDS_LM_CMD_SUSPEND = 18, + PDS_LM_CMD_SUSPEND_STATUS = 19, + PDS_LM_CMD_RESUME = 20, + PDS_LM_CMD_SAVE = 21, + PDS_LM_CMD_RESTORE = 22, + + /* Dirty page tracking commands */ + PDS_LM_CMD_DIRTY_STATUS = 32, + PDS_LM_CMD_DIRTY_ENABLE = 33, + PDS_LM_CMD_DIRTY_DISABLE = 34, + PDS_LM_CMD_DIRTY_READ_SEQ = 35, + PDS_LM_CMD_DIRTY_WRITE_ACK = 36, +}; + +/** + * struct pds_lm_cmd - generic command + * @opcode: Opcode + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Structure padding to 60 Bytes + */ +struct pds_lm_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[56]; +}; + +/** + * struct pds_lm_state_size_cmd - STATE_SIZE command + * @opcode: Opcode + * @rsvd: Word boundary padding + * @vf_id: VF id + */ +struct pds_lm_state_size_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; +}; + +/** + * struct pds_lm_state_size_comp - STATE_SIZE command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the desc ring for which this is the completion + * @size: Size of the device state + * @rsvd2: Word boundary padding + * @color: Color bit + */ +struct pds_lm_state_size_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + union { + __le64 size; + u8 rsvd2[11]; + } __packed; + u8 color; +}; + +enum pds_lm_suspend_resume_type { + PDS_LM_SUSPEND_RESUME_TYPE_FULL = 0, + PDS_LM_SUSPEND_RESUME_TYPE_P2P = 1, +}; + +/** + * struct pds_lm_suspend_cmd - SUSPEND command + * @opcode: Opcode PDS_LM_CMD_SUSPEND + * @rsvd: Word boundary padding + * @vf_id: VF id + * @type: Type of suspend (enum pds_lm_suspend_resume_type) + */ +struct pds_lm_suspend_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 type; +}; + +/** + * struct pds_lm_suspend_status_cmd - SUSPEND status command + * @opcode: Opcode PDS_AQ_CMD_LM_SUSPEND_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + * @type: Type of suspend (enum pds_lm_suspend_resume_type) + */ +struct pds_lm_suspend_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 type; +}; + +/** + * struct pds_lm_resume_cmd - RESUME command + * @opcode: Opcode PDS_LM_CMD_RESUME + * @rsvd: Word boundary padding + * @vf_id: VF id + * @type: Type of resume (enum pds_lm_suspend_resume_type) + */ +struct pds_lm_resume_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 type; +}; + +/** + * struct pds_lm_sg_elem - Transmit scatter-gather (SG) descriptor element + * @addr: DMA address of SG element data buffer + * @len: Length of SG element data buffer, in bytes + * @rsvd: Word boundary padding + */ +struct pds_lm_sg_elem { + __le64 addr; + __le32 len; + __le16 rsvd[2]; +}; + +/** + * struct pds_lm_save_cmd - SAVE command + * @opcode: Opcode PDS_LM_CMD_SAVE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Word boundary padding + * @sgl_addr: IOVA address of the SGL to dma the device state + * @num_sge: Total number of SG elements + */ +struct pds_lm_save_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[4]; + __le64 sgl_addr; + __le32 num_sge; +} __packed; + +/** + * struct pds_lm_restore_cmd - RESTORE command + * @opcode: Opcode PDS_LM_CMD_RESTORE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Word boundary padding + * @sgl_addr: IOVA address of the SGL to dma the device state + * @num_sge: Total number of SG elements + */ +struct pds_lm_restore_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[4]; + __le64 sgl_addr; + __le32 num_sge; +} __packed; + +/** + * union pds_lm_dev_state - device state information + * @words: Device state words + */ +union pds_lm_dev_state { + __le32 words[PDS_LM_DEVICE_STATE_LENGTH / sizeof(__le32)]; +}; + +enum pds_lm_host_vf_status { + PDS_LM_STA_NONE = 0, + PDS_LM_STA_IN_PROGRESS, + PDS_LM_STA_MAX, +}; + +/** + * struct pds_lm_dirty_region_info - Memory region info for STATUS and ENABLE + * @dma_base: Base address of the DMA-contiguous memory region + * @page_count: Number of pages in the memory region + * @page_size_log2: Log2 page size in the memory region + * @rsvd: Word boundary padding + */ +struct pds_lm_dirty_region_info { + __le64 dma_base; + __le32 page_count; + u8 page_size_log2; + u8 rsvd[3]; +}; + +/** + * struct pds_lm_dirty_status_cmd - DIRTY_STATUS command + * @opcode: Opcode PDS_LM_CMD_DIRTY_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + * @max_regions: Capacity of the region info buffer + * @rsvd2: Word boundary padding + * @regions_dma: DMA address of the region info buffer + * + * The minimum of max_regions (from the command) and num_regions (from the + * completion) of struct pds_lm_dirty_region_info will be written to + * regions_dma. + * + * The max_regions may be zero, in which case regions_dma is ignored. In that + * case, the completion will only report the maximum number of regions + * supported by the device, and the number of regions currently enabled. + */ +struct pds_lm_dirty_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 max_regions; + u8 rsvd2[3]; + __le64 regions_dma; +} __packed; + +/** + * enum pds_lm_dirty_bmp_type - Type of dirty page bitmap + * @PDS_LM_DIRTY_BMP_TYPE_NONE: No bitmap / disabled + * @PDS_LM_DIRTY_BMP_TYPE_SEQ_ACK: Seq/Ack bitmap representation + */ +enum pds_lm_dirty_bmp_type { + PDS_LM_DIRTY_BMP_TYPE_NONE = 0, + PDS_LM_DIRTY_BMP_TYPE_SEQ_ACK = 1, +}; + +/** + * struct pds_lm_dirty_status_comp - STATUS command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the desc ring for which this is the completion + * @max_regions: Maximum number of regions supported by the device + * @num_regions: Number of regions currently enabled + * @bmp_type: Type of dirty bitmap representation + * @rsvd2: Word boundary padding + * @bmp_type_mask: Mask of supported bitmap types, bit index per type + * @rsvd3: Word boundary padding + * @color: Color bit + * + * This completion descriptor is used for STATUS, ENABLE, and DISABLE. + */ +struct pds_lm_dirty_status_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 max_regions; + u8 num_regions; + u8 bmp_type; + u8 rsvd2; + __le32 bmp_type_mask; + u8 rsvd3[3]; + u8 color; +}; + +/** + * struct pds_lm_dirty_enable_cmd - DIRTY_ENABLE command + * @opcode: Opcode PDS_LM_CMD_DIRTY_ENABLE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @bmp_type: Type of dirty bitmap representation + * @num_regions: Number of entries in the region info buffer + * @rsvd2: Word boundary padding + * @regions_dma: DMA address of the region info buffer + * + * The num_regions must be nonzero, and less than or equal to the maximum + * number of regions supported by the device. + * + * The memory regions should not overlap. + * + * The information should be initialized by the driver. The device may modify + * the information on successful completion, such as by size-aligning the + * number of pages in a region. + * + * The modified number of pages will be greater than or equal to the page count + * given in the enable command, and at least as coarsly aligned as the given + * value. For example, the count might be aligned to a multiple of 64, but + * if the value is already a multiple of 128 or higher, it will not change. + * If the driver requires its own minimum alignment of the number of pages, the + * driver should account for that already in the region info of this command. + * + * This command uses struct pds_lm_dirty_status_comp for its completion. + */ +struct pds_lm_dirty_enable_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 bmp_type; + u8 num_regions; + u8 rsvd2[2]; + __le64 regions_dma; +} __packed; + +/** + * struct pds_lm_dirty_disable_cmd - DIRTY_DISABLE command + * @opcode: Opcode PDS_LM_CMD_DIRTY_DISABLE + * @rsvd: Word boundary padding + * @vf_id: VF id + * + * Dirty page tracking will be disabled. This may be called in any state, as + * long as dirty page tracking is supported by the device, to ensure that dirty + * page tracking is disabled. + * + * This command uses struct pds_lm_dirty_status_comp for its completion. On + * success, num_regions will be zero. + */ +struct pds_lm_dirty_disable_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; +}; + +/** + * struct pds_lm_dirty_seq_ack_cmd - DIRTY_READ_SEQ or _WRITE_ACK command + * @opcode: Opcode PDS_LM_CMD_DIRTY_[READ_SEQ|WRITE_ACK] + * @rsvd: Word boundary padding + * @vf_id: VF id + * @off_bytes: Byte offset in the bitmap + * @len_bytes: Number of bytes to transfer + * @num_sge: Number of DMA scatter gather elements + * @rsvd2: Word boundary padding + * @sgl_addr: DMA address of scatter gather list + * + * Read bytes from the SEQ bitmap, or write bytes into the ACK bitmap. + * + * This command treats the entire bitmap as a byte buffer. It does not + * distinguish between guest memory regions. The driver should refer to the + * number of pages in each region, according to PDS_LM_CMD_DIRTY_STATUS, to + * determine the region boundaries in the bitmap. Each region will be + * represented by exactly the number of bits as the page count for that region, + * immediately following the last bit of the previous region. + */ +struct pds_lm_dirty_seq_ack_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + __le32 off_bytes; + __le32 len_bytes; + __le16 num_sge; + u8 rsvd2[2]; + __le64 sgl_addr; +} __packed; + +/** + * struct pds_lm_host_vf_status_cmd - HOST_VF_STATUS command + * @opcode: Opcode PDS_LM_CMD_HOST_VF_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + * @status: Current LM status of host VF driver (enum pds_lm_host_status) + */ +struct pds_lm_host_vf_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 status; +}; + union pds_core_adminq_cmd { u8 opcode; u8 bytes[64]; @@ -844,6 +1205,17 @@ union pds_core_adminq_cmd { struct pds_vdpa_vq_init_cmd vdpa_vq_init; struct pds_vdpa_vq_reset_cmd vdpa_vq_reset; + struct pds_lm_suspend_cmd lm_suspend; + struct pds_lm_suspend_status_cmd lm_suspend_status; + struct pds_lm_resume_cmd lm_resume; + struct pds_lm_state_size_cmd lm_state_size; + struct pds_lm_save_cmd lm_save; + struct pds_lm_restore_cmd lm_restore; + struct pds_lm_host_vf_status_cmd lm_host_vf_status; + struct pds_lm_dirty_status_cmd lm_dirty_status; + struct pds_lm_dirty_enable_cmd lm_dirty_enable; + struct pds_lm_dirty_disable_cmd lm_dirty_disable; + struct pds_lm_dirty_seq_ack_cmd lm_dirty_seq_ack; }; union pds_core_adminq_comp { @@ -868,6 +1240,9 @@ union pds_core_adminq_comp { struct pds_vdpa_vq_init_comp vdpa_vq_init; struct pds_vdpa_vq_reset_comp vdpa_vq_reset; + + struct pds_lm_state_size_comp lm_state_size; + struct pds_lm_dirty_status_comp lm_dirty_status; }; #ifndef __CHECKER__ diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 435c8e8161c2..30581e2e04cc 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -34,16 +34,19 @@ enum pds_core_vif_types { #define PDS_DEV_TYPE_CORE_STR "Core" #define PDS_DEV_TYPE_VDPA_STR "vDPA" -#define PDS_DEV_TYPE_VFIO_STR "VFio" +#define PDS_DEV_TYPE_VFIO_STR "vfio" #define PDS_DEV_TYPE_ETH_STR "Eth" #define PDS_DEV_TYPE_RDMA_STR "RDMA" #define PDS_DEV_TYPE_LM_STR "LM" #define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR +#define PDS_VFIO_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR "." PDS_DEV_TYPE_VFIO_STR + +struct pdsc; int pdsc_register_notify(struct notifier_block *nb); void pdsc_unregister_notify(struct notifier_block *nb); void *pdsc_get_pf_struct(struct pci_dev *vf_pdev); -int pds_client_register(struct pci_dev *pf_pdev, char *devname); -int pds_client_unregister(struct pci_dev *pf_pdev, u16 client_id); +int pds_client_register(struct pdsc *pf, char *devname); +int pds_client_unregister(struct pdsc *pf, u16 client_id); #endif /* _PDS_COMMON_H_ */ diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h index e838a2b90440..17a87c1a55d7 100644 --- a/include/linux/pds/pds_core_if.h +++ b/include/linux/pds/pds_core_if.h @@ -79,6 +79,7 @@ enum pds_core_status_code { PDS_RC_EVFID = 31, /* VF ID does not exist */ PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */ PDS_RC_ECLIENT = 33, /* No such client id */ + PDS_RC_BAD_PCI = 255, /* Broken PCI when reading status */ }; /** diff --git a/include/linux/peci.h b/include/linux/peci.h index 06e6ef935297..9b3d36aff431 100644 --- a/include/linux/peci.h +++ b/include/linux/peci.h @@ -42,13 +42,13 @@ struct peci_controller_ops { */ struct peci_controller { struct device dev; - struct peci_controller_ops *ops; + const struct peci_controller_ops *ops; struct mutex bus_lock; /* held for the duration of xfer */ u8 id; }; struct peci_controller *devm_peci_controller_add(struct device *parent, - struct peci_controller_ops *ops); + const struct peci_controller_ops *ops); static inline struct peci_controller *to_peci_controller(void *d) { diff --git a/include/linux/percpu.h b/include/linux/percpu.h index b3b458442330..8c677f185901 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -35,6 +35,12 @@ #define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \ PCPU_MIN_ALLOC_SHIFT) +#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#define PERCPU_DYNAMIC_SIZE_SHIFT 12 +#else +#define PERCPU_DYNAMIC_SIZE_SHIFT 10 +#endif + /* * Percpu allocator can serve percpu allocations before slab is * initialized which allows slab to depend on the percpu allocator. @@ -42,7 +48,7 @@ * for this. Keep PERCPU_DYNAMIC_RESERVE equal to or larger than * PERCPU_DYNAMIC_EARLY_SIZE. */ -#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10) +#define PERCPU_DYNAMIC_EARLY_SIZE (20 << PERCPU_DYNAMIC_SIZE_SHIFT) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy @@ -56,9 +62,9 @@ * intelligent way to determine this would be nice. */ #if BITS_PER_LONG > 32 -#define PERCPU_DYNAMIC_RESERVE (28 << 10) +#define PERCPU_DYNAMIC_RESERVE (28 << PERCPU_DYNAMIC_SIZE_SHIFT) #else -#define PERCPU_DYNAMIC_RESERVE (20 << 10) +#define PERCPU_DYNAMIC_RESERVE (20 << PERCPU_DYNAMIC_SIZE_SHIFT) #endif extern void *pcpu_base_addr; @@ -126,6 +132,7 @@ extern void __init setup_per_cpu_areas(void); extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); extern void free_percpu(void __percpu *__pdata); +extern size_t pcpu_alloc_size(void __percpu *__pdata); DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 75b73c83bc9d..3a44dd1e33d2 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -30,22 +30,35 @@ struct percpu_counter { extern int percpu_counter_batch; -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, - struct lock_class_key *key); +int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, + gfp_t gfp, u32 nr_counters, + struct lock_class_key *key); -#define percpu_counter_init(fbc, value, gfp) \ +#define percpu_counter_init_many(fbc, value, gfp, nr_counters) \ ({ \ static struct lock_class_key __key; \ \ - __percpu_counter_init(fbc, value, gfp, &__key); \ + __percpu_counter_init_many(fbc, value, gfp, nr_counters,\ + &__key); \ }) -void percpu_counter_destroy(struct percpu_counter *fbc); + +#define percpu_counter_init(fbc, value, gfp) \ + percpu_counter_init_many(fbc, value, gfp, 1) + +void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters); +static inline void percpu_counter_destroy(struct percpu_counter *fbc) +{ + percpu_counter_destroy_many(fbc, 1); +} + void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); +bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, + s64 amount, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) @@ -58,6 +71,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) percpu_counter_add_batch(fbc, amount, percpu_counter_batch); } +static inline bool +percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount) +{ + return __percpu_counter_limited_add(fbc, limit, amount, + percpu_counter_batch); +} + /* * With percpu_counter_add_local() and percpu_counter_sub_local(), counts * are accumulated in local per cpu counter and not in fbc->count until @@ -116,11 +136,27 @@ struct percpu_counter { s64 count; }; +static inline int percpu_counter_init_many(struct percpu_counter *fbc, + s64 amount, gfp_t gfp, + u32 nr_counters) +{ + u32 i; + + for (i = 0; i < nr_counters; i++) + fbc[i].count = amount; + + return 0; +} + static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp) { - fbc->count = amount; - return 0; + return percpu_counter_init_many(fbc, amount, gfp, 1); +} + +static inline void percpu_counter_destroy_many(struct percpu_counter *fbc, + u32 nr_counters) +{ } static inline void percpu_counter_destroy(struct percpu_counter *fbc) @@ -158,6 +194,27 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount) local_irq_restore(flags); } +static inline bool +percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount) +{ + unsigned long flags; + bool good = false; + s64 count; + + if (amount == 0) + return true; + + local_irq_save(flags); + count = fbc->count + amount; + if ((amount > 0 && count <= limit) || + (amount < 0 && count >= limit)) { + fbc->count = count; + good = true; + } + local_irq_restore(flags); + return good; +} + /* non-SMP percpu_counter_add_local is the same with percpu_counter_add */ static inline void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index a0801f68762b..143fbc10ecfe 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -187,5 +187,6 @@ void armpmu_free_irq(int irq, int cpu); #endif /* CONFIG_ARM_PMU */ #define ARMV8_SPE_PDEV_NAME "arm,spe-v1" +#define ARMV8_TRBE_PDEV_NAME "arm,trbe" #endif /* __ARM_PMU_H__ */ diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index e3899bd77f5c..9c226adf938a 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -234,9 +234,12 @@ /* * Event filters for PMUv3 */ -#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) -#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) -#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) +#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29) +#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28) +#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26) /* * PMUSERENR: user enable reg diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 43fc892aa7d9..43282e22ebe1 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -6,8 +6,8 @@ * */ -#ifndef _ASM_RISCV_PERF_EVENT_H -#define _ASM_RISCV_PERF_EVENT_H +#ifndef _RISCV_PMU_H +#define _RISCV_PMU_H #include <linux/perf_event.h> #include <linux/ptrace.h> @@ -21,7 +21,7 @@ #define RISCV_MAX_COUNTERS 64 #define RISCV_OP_UNSUPP (-EOPNOTSUPP) -#define RISCV_PMU_PDEV_NAME "riscv-pmu" +#define RISCV_PMU_SBI_PDEV_NAME "riscv-pmu-sbi" #define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy" #define RISCV_PMU_STOP_FLAG_RESET 1 @@ -55,6 +55,10 @@ struct riscv_pmu { void (*ctr_start)(struct perf_event *event, u64 init_val); void (*ctr_stop)(struct perf_event *event, unsigned long flag); int (*event_map)(struct perf_event *event, u64 *config); + void (*event_init)(struct perf_event *event); + void (*event_mapped)(struct perf_event *event, struct mm_struct *mm); + void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm); + uint8_t (*csr_index)(struct perf_event *event); struct cpu_hw_events __percpu *hw_events; struct hlist_node node; @@ -81,4 +85,4 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr); #endif /* CONFIG_RISCV_PMU */ -#endif /* _ASM_RISCV_PERF_EVENT_H */ +#endif /* _RISCV_PMU_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2166a69e3bf2..5547ba68e6e4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -288,10 +288,9 @@ struct perf_event_pmu_context; #define PERF_PMU_CAP_EXTENDED_REGS 0x0008 #define PERF_PMU_CAP_EXCLUSIVE 0x0010 #define PERF_PMU_CAP_ITRACE 0x0020 -#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x0040 -#define PERF_PMU_CAP_NO_EXCLUDE 0x0080 -#define PERF_PMU_CAP_AUX_OUTPUT 0x0100 -#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0200 +#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 +#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 +#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 struct perf_output_handle; @@ -445,7 +444,8 @@ struct pmu { /* * Will return the value for perf_event_mmap_page::index for this event, - * if no implementation is provided it will default to: event->hw.idx + 1. + * if no implementation is provided it will default to 0 (see + * perf_event_idx_default). */ int (*event_idx) (struct perf_event *event); /*optional */ @@ -704,6 +704,7 @@ struct perf_event { /* The cumulative AND of all event_caps for events in this group. */ int group_caps; + unsigned int group_generation; struct perf_event *group_leader; /* * event->pmu will always point to pmu in which this event belongs. @@ -842,11 +843,11 @@ struct perf_event { }; /* - * ,-----------------------[1:n]----------------------. - * V V - * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event - * ^ ^ | | - * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-' + * ,-----------------------[1:n]------------------------. + * V V + * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event + * | | + * `--[n:1]-> pmu <-[1:n]--' * * * struct perf_event_pmu_context lifetime is refcount based and RCU freed @@ -864,6 +865,9 @@ struct perf_event { * ctx->mutex pinning the configuration. Since we hold a reference on * group_leader (through the filedesc) it can't go away, therefore it's * associated pmu_ctx must exist and cannot change due to ctx->mutex. + * + * perf_event holds a refcount on perf_event_context + * perf_event holds a refcount on perf_event_pmu_context */ struct perf_event_pmu_context { struct pmu *pmu; @@ -878,6 +882,7 @@ struct perf_event_pmu_context { unsigned int embedded : 1; unsigned int nr_events; + unsigned int nr_cgroups; atomic_t refcount; /* event <-> epc */ struct rcu_head rcu_head; @@ -1194,7 +1199,8 @@ struct perf_sample_data { PERF_MEM_S(LVL, NA) |\ PERF_MEM_S(SNOOP, NA) |\ PERF_MEM_S(LOCK, NA) |\ - PERF_MEM_S(TLB, NA)) + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr, u64 period) @@ -1316,15 +1322,31 @@ extern int perf_event_output(struct perf_event *event, struct pt_regs *regs); static inline bool -is_default_overflow_handler(struct perf_event *event) +__is_default_overflow_handler(perf_overflow_handler_t overflow_handler) { - if (likely(event->overflow_handler == perf_event_output_forward)) + if (likely(overflow_handler == perf_event_output_forward)) return true; - if (unlikely(event->overflow_handler == perf_event_output_backward)) + if (unlikely(overflow_handler == perf_event_output_backward)) return true; return false; } +#define is_default_overflow_handler(event) \ + __is_default_overflow_handler((event)->overflow_handler) + +#ifdef CONFIG_BPF_SYSCALL +static inline bool uses_default_overflow_handler(struct perf_event *event) +{ + if (likely(is_default_overflow_handler(event))) + return true; + + return __is_default_overflow_handler(event->orig_overflow_handler); +} +#else +#define uses_default_overflow_handler(event) \ + is_default_overflow_handler(event) +#endif + extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, @@ -1556,7 +1578,7 @@ extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -int perf_proc_update_handler(struct ctl_table *table, int write, +int perf_event_max_sample_rate_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); @@ -1860,10 +1882,6 @@ extern void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now); -#ifdef CONFIG_MMU -extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr); -#endif - /* * Snapshot branch stack on software events. * diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5063b482e34f..af7639c3b0a3 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -5,6 +5,9 @@ #include <linux/pfn.h> #include <asm/pgtable.h> +#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT) +#define PUD_ORDER (PUD_SHIFT - PAGE_SHIFT) + #ifndef __ASSEMBLY__ #ifdef CONFIG_MMU @@ -63,7 +66,6 @@ static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); } -#define pte_index pte_index #ifndef pmd_index static inline unsigned long pmd_index(unsigned long address) @@ -99,7 +101,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) ((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address))) #define pte_unmap(pte) do { \ kunmap_local((pte)); \ - /* rcu_read_unlock() to be added later */ \ + rcu_read_unlock(); \ } while (0) #else static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address) @@ -108,10 +110,12 @@ static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address) } static inline void pte_unmap(pte_t *pte) { - /* rcu_read_unlock() to be added later */ + rcu_read_unlock(); } #endif +void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable); + /* Find an entry in the second-level page table.. */ #ifndef pmd_offset static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) @@ -180,6 +184,68 @@ static inline int pmd_young(pmd_t pmd) } #endif +/* + * A facility to provide lazy MMU batching. This allows PTE updates and + * page invalidations to be delayed until a call to leave lazy MMU mode + * is issued. Some architectures may benefit from doing this, and it is + * beneficial for both shadow and direct mode hypervisors, which may batch + * the PTE updates which happen during this window. Note that using this + * interface requires that read hazards be removed from the code. A read + * hazard could result in the direct mode hypervisor case, since the actual + * write to the page tables may not yet have taken place, so reads though + * a raw PTE pointer after it has been modified are not guaranteed to be + * up to date. This mode can only be entered and left under the protection of + * the page table locks for all page tables which may be modified. In the UP + * case, this is required so that preemption is disabled, and in the SMP case, + * it must synchronize the delayed page table writes properly on other CPUs. + */ +#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE +#define arch_enter_lazy_mmu_mode() do {} while (0) +#define arch_leave_lazy_mmu_mode() do {} while (0) +#define arch_flush_lazy_mmu_mode() do {} while (0) +#endif + +#ifndef set_ptes + +#ifndef pte_next_pfn +static inline pte_t pte_next_pfn(pte_t pte) +{ + return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); +} +#endif + +/** + * set_ptes - Map consecutive pages to a contiguous range of addresses. + * @mm: Address space to map the pages into. + * @addr: Address to map the first page at. + * @ptep: Page table pointer for the first entry. + * @pte: Page table entry for the first page. + * @nr: Number of pages to map. + * + * May be overridden by the architecture, or the architecture can define + * set_pte() and PFN_PTE_SHIFT. + * + * Context: The caller holds the page table lock. The pages all belong + * to the same folio. The PTEs are all in the same PMD. + */ +static inline void set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + page_table_check_ptes_set(mm, ptep, pte, nr); + + arch_enter_lazy_mmu_mode(); + for (;;) { + set_pte(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte = pte_next_pfn(pte); + } + arch_leave_lazy_mmu_mode(); +} +#endif +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1) + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -313,6 +379,20 @@ static inline bool arch_has_hw_pte_young(void) } #endif +#ifndef arch_check_zapped_pte +static inline void arch_check_zapped_pte(struct vm_area_struct *vma, + pte_t pte) +{ +} +#endif + +#ifndef arch_check_zapped_pmd +static inline void arch_check_zapped_pmd(struct vm_area_struct *vma, + pmd_t pmd) +{ +} +#endif + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, @@ -320,7 +400,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, { pte_t pte = ptep_get(ptep); pte_clear(mm, address, ptep); - page_table_check_pte_clear(mm, address, pte); + page_table_check_pte_clear(mm, pte); return pte; } #endif @@ -390,6 +470,7 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) return pmd; } #define pmdp_get_lockless pmdp_get_lockless +#define pmdp_get_lockless_sync() tlb_remove_table_sync_one() #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */ @@ -408,6 +489,9 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) { return pmdp_get(pmdp); } +static inline void pmdp_get_lockless_sync(void) +{ +} #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -419,7 +503,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, pmd_t pmd = *pmdp; pmd_clear(pmdp); - page_table_check_pmd_clear(mm, address, pmd); + page_table_check_pmd_clear(mm, pmd); return pmd; } @@ -432,7 +516,7 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, pud_t pud = *pudp; pud_clear(pudp); - page_table_check_pud_clear(mm, address, pud); + page_table_check_pud_clear(mm, pud); return pud; } @@ -450,11 +534,11 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL -static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm, +static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, int full) { - return pudp_huge_get_and_clear(mm, address, pudp); + return pudp_huge_get_and_clear(vma->vm_mm, address, pudp); } #endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -515,6 +599,20 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, pud_t *pudp); #endif +#ifndef pte_mkwrite +static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma) +{ + return pte_mkwrite_novma(pte); +} +#endif + +#if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite) +static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) +{ + return pmd_mkwrite_novma(pmd); +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT struct mm_struct; static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) @@ -558,6 +656,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, #endif #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +#ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void pudp_set_wrprotect(struct mm_struct *mm, unsigned long address, pud_t *pudp) { @@ -571,6 +670,7 @@ static inline void pudp_set_wrprotect(struct mm_struct *mm, { BUILD_BUG(); } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ #endif @@ -693,11 +793,14 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) { return pmd_val(pmd_a) == pmd_val(pmd_b); } +#endif +#ifndef pud_same static inline int pud_same(pud_t pud_a, pud_t pud_b) { return pud_val(pud_a) == pud_val(pud_b); } +#define pud_same pud_same #endif #ifndef __HAVE_ARCH_P4D_SAME @@ -1041,27 +1144,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #endif /* - * A facility to provide lazy MMU batching. This allows PTE updates and - * page invalidations to be delayed until a call to leave lazy MMU mode - * is issued. Some architectures may benefit from doing this, and it is - * beneficial for both shadow and direct mode hypervisors, which may batch - * the PTE updates which happen during this window. Note that using this - * interface requires that read hazards be removed from the code. A read - * hazard could result in the direct mode hypervisor case, since the actual - * write to the page tables may not yet have taken place, so reads though - * a raw PTE pointer after it has been modified are not guaranteed to be - * up to date. This mode can only be entered and left under the protection of - * the page table locks for all page tables which may be modified. In the UP - * case, this is required so that preemption is disabled, and in the SMP case, - * it must synchronize the delayed page table writes properly on other CPUs. - */ -#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE -#define arch_enter_lazy_mmu_mode() do {} while (0) -#define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_flush_lazy_mmu_mode() do {} while (0) -#endif - -/* * A facility to provide batching of the reload of page tables and * other process state with the actual context switch code for * paravirtualized guests. By convention, only one of the batched @@ -1322,12 +1404,16 @@ static inline int pud_trans_unstable(pud_t *pud) #ifndef CONFIG_NUMA_BALANCING /* - * Technically a PTE can be PROTNONE even when not doing NUMA balancing but - * the only case the kernel cares is for NUMA balancing and is only ever set - * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked - * _PAGE_PROTNONE so by default, implement the helper as "always no". It - * is the responsibility of the caller to distinguish between PROT_NONE - * protections and NUMA hinting fault protections. + * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is + * perfectly valid to indicate "no" in that case, which is why our default + * implementation defaults to "always no". + * + * In an accessible VMA, however, pte_protnone() reliably indicates PROT_NONE + * page protection due to NUMA hinting. NUMA hinting faults only apply in + * accessible VMAs. + * + * So, to reliably identify PROT_NONE PTEs that require a NUMA hinting fault, + * looking at the VMA accessibility is sufficient. */ static inline int pte_protnone(pte_t pte) { @@ -1499,6 +1585,9 @@ typedef unsigned int pgtbl_mod_mask; #define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE) #endif +#ifndef has_transparent_pud_hugepage +#define has_transparent_pud_hugepage() IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) +#endif /* * On some architectures it depends on the mm if the p4d/pud or pmd * layer of the page table hierarchy is folded or not. diff --git a/include/linux/phy.h b/include/linux/phy.h index 11c1e91563d4..3cc52826f18e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -110,6 +110,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface * @PHY_INTERFACE_MODE_XLGMII:40 gigabit media-independent interface * @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax + * @PHY_INTERFACE_MODE_PSGMII: Penta SGMII * @PHY_INTERFACE_MODE_QSGMII: Quad SGMII * @PHY_INTERFACE_MODE_TRGMII: Turbo RGMII * @PHY_INTERFACE_MODE_100BASEX: 100 BaseX @@ -147,6 +148,7 @@ typedef enum { PHY_INTERFACE_MODE_XGMII, PHY_INTERFACE_MODE_XLGMII, PHY_INTERFACE_MODE_MOCA, + PHY_INTERFACE_MODE_PSGMII, PHY_INTERFACE_MODE_QSGMII, PHY_INTERFACE_MODE_TRGMII, PHY_INTERFACE_MODE_100BASEX, @@ -254,6 +256,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "xlgmii"; case PHY_INTERFACE_MODE_MOCA: return "moca"; + case PHY_INTERFACE_MODE_PSGMII: + return "psgmii"; case PHY_INTERFACE_MODE_QSGMII: return "qsgmii"; case PHY_INTERFACE_MODE_TRGMII: @@ -298,6 +302,7 @@ static inline const char *phy_modes(phy_interface_t interface) #define MII_BUS_ID_SIZE 61 struct device; +struct kernel_hwtstamp_config; struct phylink; struct sfp_bus; struct sfp_upstream_ops; @@ -1104,6 +1109,39 @@ struct phy_driver { int (*led_blink_set)(struct phy_device *dev, u8 index, unsigned long *delay_on, unsigned long *delay_off); + /** + * @led_hw_is_supported: Can the HW support the given rules. + * @dev: PHY device which has the LED + * @index: Which LED of the PHY device + * @rules The core is interested in these rules + * + * Return 0 if yes, -EOPNOTSUPP if not, or an error code. + */ + int (*led_hw_is_supported)(struct phy_device *dev, u8 index, + unsigned long rules); + /** + * @led_hw_control_set: Set the HW to control the LED + * @dev: PHY device which has the LED + * @index: Which LED of the PHY device + * @rules The rules used to control the LED + * + * Returns 0, or a an error code. + */ + int (*led_hw_control_set)(struct phy_device *dev, u8 index, + unsigned long rules); + /** + * @led_hw_control_get: Get how the HW is controlling the LED + * @dev: PHY device which has the LED + * @index: Which LED of the PHY device + * @rules Pointer to the rules used to control the LED + * + * Set *@rules to how the HW is currently blinking. Returns 0 + * on success, or a error code if the current blinking cannot + * be represented in rules, or some other error happens. + */ + int (*led_hw_control_get)(struct phy_device *dev, u8 index, + unsigned long *rules); + }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -1698,6 +1736,7 @@ void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_config_aneg(struct phy_device *phydev); +int _phy_start_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); int phy_speed_down(struct phy_device *phydev, bool sync); @@ -1731,10 +1770,6 @@ int phy_start_cable_test_tdr(struct phy_device *phydev, } #endif -int phy_cable_test_result(struct phy_device *phydev, u8 pair, u16 result); -int phy_cable_test_fault_length(struct phy_device *phydev, u8 pair, - u16 cm); - static inline void phy_device_reset(struct phy_device *phydev, int value) { mdio_device_reset(&phydev->mdio, value); @@ -1826,6 +1861,7 @@ int genphy_c45_an_config_aneg(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); +int genphy_c45_pma_baset1_read_abilities(struct phy_device *phydev); int genphy_c45_read_eee_abilities(struct phy_device *phydev); int genphy_c45_pma_baset1_read_master_slave(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); @@ -1954,6 +1990,12 @@ int phy_ethtool_set_plca_cfg(struct phy_device *phydev, int phy_ethtool_get_plca_status(struct phy_device *phydev, struct phy_plca_status *plca_st); +int __phy_hwtstamp_get(struct phy_device *phydev, + struct kernel_hwtstamp_config *config); +int __phy_hwtstamp_set(struct phy_device *phydev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); + static inline int phy_package_read(struct phy_device *phydev, u32 regnum) { struct phy_package_shared *shared = phydev->shared; diff --git a/include/linux/phylib_stubs.h b/include/linux/phylib_stubs.h new file mode 100644 index 000000000000..1279f48c8a70 --- /dev/null +++ b/include/linux/phylib_stubs.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Stubs for the Network PHY library + */ + +#include <linux/rtnetlink.h> + +struct kernel_hwtstamp_config; +struct netlink_ext_ack; +struct phy_device; + +#if IS_ENABLED(CONFIG_PHYLIB) + +extern const struct phylib_stubs *phylib_stubs; + +struct phylib_stubs { + int (*hwtstamp_get)(struct phy_device *phydev, + struct kernel_hwtstamp_config *config); + int (*hwtstamp_set)(struct phy_device *phydev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); +}; + +static inline int phy_hwtstamp_get(struct phy_device *phydev, + struct kernel_hwtstamp_config *config) +{ + /* phylib_register_stubs() and phylib_unregister_stubs() + * also run under rtnl_lock(). + */ + ASSERT_RTNL(); + + if (!phylib_stubs) + return -EOPNOTSUPP; + + return phylib_stubs->hwtstamp_get(phydev, config); +} + +static inline int phy_hwtstamp_set(struct phy_device *phydev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + /* phylib_register_stubs() and phylib_unregister_stubs() + * also run under rtnl_lock(). + */ + ASSERT_RTNL(); + + if (!phylib_stubs) + return -EOPNOTSUPP; + + return phylib_stubs->hwtstamp_set(phydev, config, extack); +} + +#else + +static inline int phy_hwtstamp_get(struct phy_device *phydev, + struct kernel_hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int phy_hwtstamp_set(struct phy_device *phydev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +#endif diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 1817940a3418..875439ab45de 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -9,6 +9,7 @@ struct device_node; struct ethtool_cmd; struct fwnode_handle; struct net_device; +struct phylink; enum { MLO_PAUSE_NONE, @@ -200,8 +201,6 @@ enum phylink_op_type { * struct phylink_config - PHYLINK configuration structure * @dev: a pointer to a struct device associated with the MAC * @type: operation type of PHYLINK instance - * @legacy_pre_march2020: driver has not been updated for March 2020 updates - * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls") * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM. @@ -215,7 +214,6 @@ enum phylink_op_type { struct phylink_config { struct device *dev; enum phylink_op_type type; - bool legacy_pre_march2020; bool poll_fixed_state; bool mac_managed_pm; bool ovr_an_inband; @@ -225,35 +223,31 @@ struct phylink_config { unsigned long mac_capabilities; }; +void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed); + /** * struct phylink_mac_ops - MAC operations structure. - * @validate: Validate and update the link configuration. + * @mac_get_caps: Get MAC capabilities for interface mode. * @mac_select_pcs: Select a PCS for the interface mode. - * @mac_pcs_get_state: Read the current link state from the hardware. * @mac_prepare: prepare for a major reconfiguration of the interface. * @mac_config: configure the MAC for the selected mode and state. * @mac_finish: finish a major reconfiguration of the interface. - * @mac_an_restart: restart 802.3z BaseX autonegotiation. * @mac_link_down: take the link down. * @mac_link_up: allow the link to come up. * * The individual methods are described more fully below. */ struct phylink_mac_ops { - void (*validate)(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state); + unsigned long (*mac_get_caps)(struct phylink_config *config, + phy_interface_t interface); struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config, phy_interface_t interface); - void (*mac_pcs_get_state)(struct phylink_config *config, - struct phylink_link_state *state); int (*mac_prepare)(struct phylink_config *config, unsigned int mode, phy_interface_t iface); void (*mac_config)(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); int (*mac_finish)(struct phylink_config *config, unsigned int mode, phy_interface_t iface); - void (*mac_an_restart)(struct phylink_config *config); void (*mac_link_down)(struct phylink_config *config, unsigned int mode, phy_interface_t interface); void (*mac_link_up)(struct phylink_config *config, @@ -264,39 +258,17 @@ struct phylink_mac_ops { #if 0 /* For kernel-doc purposes only. */ /** - * validate - Validate and update the link configuration + * mac_get_caps: Get MAC capabilities for interface mode. * @config: a pointer to a &struct phylink_config. - * @supported: ethtool bitmask for supported link modes. - * @state: a pointer to a &struct phylink_link_state. - * - * Clear bits in the @supported and @state->advertising masks that - * are not supportable by the MAC. - * - * Note that the PHY may be able to transform from one connection - * technology to another, so, eg, don't clear 1000BaseX just - * because the MAC is unable to BaseX mode. This is more about - * clearing unsupported speeds and duplex settings. The port modes - * should not be cleared; phylink_set_port_modes() will help with this. + * @interface: PHY interface mode. * - * When @config->supported_interfaces has been set, phylink will iterate - * over the supported interfaces to determine the full capability of the - * MAC. The validation function must not print errors if @state->interface - * is set to an unexpected value. - * - * When @config->supported_interfaces is empty, phylink will call this - * function with @state->interface set to %PHY_INTERFACE_MODE_NA, and - * expects the MAC driver to return all supported link modes. - * - * If the @state->interface mode is not supported, then the @supported - * mask must be cleared. - * - * This member is optional; if not set, the generic validator will be - * used making use of @config->mac_capabilities and - * @config->supported_interfaces to determine which link modes are - * supported. + * Optional method. When not provided, config->mac_capabilities will be used. + * When implemented, this returns the MAC capabilities for the specified + * interface mode where there is some special handling required by the MAC + * driver (e.g. not supporting half-duplex in certain interface modes.) */ -void validate(struct phylink_config *config, unsigned long *supported, - struct phylink_link_state *state); +unsigned long mac_get_caps(struct phylink_config *config, + phy_interface_t interface); /** * mac_select_pcs: Select a PCS for the interface mode. * @config: a pointer to a &struct phylink_config. @@ -314,25 +286,6 @@ struct phylink_pcs *mac_select_pcs(struct phylink_config *config, phy_interface_t interface); /** - * mac_pcs_get_state() - Read the current inband link state from the hardware - * @config: a pointer to a &struct phylink_config. - * @state: a pointer to a &struct phylink_link_state. - * - * Read the current inband link state from the MAC PCS, reporting the - * current speed in @state->speed, duplex mode in @state->duplex, pause - * mode in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits, - * negotiation completion state in @state->an_complete, and link up state - * in @state->link. If possible, @state->lp_advertising should also be - * populated. - * - * Note: This is a legacy method. This function will not be called unless - * legacy_pre_march2020 is set in &struct phylink_config and there is no - * PCS attached. - */ -void mac_pcs_get_state(struct phylink_config *config, - struct phylink_link_state *state); - -/** * mac_prepare() - prepare to change the PHY interface mode * @config: a pointer to a &struct phylink_config. * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. @@ -368,17 +321,9 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * guaranteed to be correct, and so any mac_config() implementation must * never reference these fields. * - * Note: For legacy March 2020 drivers (drivers with legacy_pre_march2020 set - * in their &phylnk_config and which don't have a PCS), this function will be - * called on each link up event, and to also change the in-band advert. For - * non-legacy drivers, it will only be called to reconfigure the MAC for a - * "major" change in e.g. interface mode. It will not be called for changes - * in speed, duplex or pause modes or to change the in-band advertisement. - * In any case, it is strongly preferred that speed, duplex and pause settings - * are handled in the mac_link_up() method and not in this method. - * - * (this requires a rewrite - please refer to mac_link_up() for situations - * where the PCS and MAC are not tightly integrated.) + * This will only be called to reconfigure the MAC for a "major" change in + * e.g. interface mode. It will not be called for changes in speed, duplex + * or pause modes or to change the in-band advertisement. * * In all negotiation modes, as defined by @mode, @state->pause indicates the * pause settings which should be applied as follows. If %MLO_PAUSE_AN is not @@ -410,7 +355,7 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * 1000base-X or Cisco SGMII mode depending on the @state->interface * mode). In both cases, link state management (whether the link * is up or not) is performed by the MAC, and reported via the - * mac_pcs_get_state() callback. Changes in link state must be made + * pcs_get_state() callback. Changes in link state must be made * by calling phylink_mac_change(). * * Interface mode specific details are mentioned below. @@ -459,16 +404,6 @@ int mac_finish(struct phylink_config *config, unsigned int mode, phy_interface_t iface); /** - * mac_an_restart() - restart 802.3z BaseX autonegotiation - * @config: a pointer to a &struct phylink_config. - * - * Note: This is a legacy method. This function will not be called unless - * legacy_pre_march2020 is set in &struct phylink_config and there is no - * PCS attached. - */ -void mac_an_restart(struct phylink_config *config); - -/** * mac_link_down() - take the link down * @config: a pointer to a &struct phylink_config. * @mode: link autonegotiation mode @@ -520,14 +455,19 @@ struct phylink_pcs_ops; /** * struct phylink_pcs - PHYLINK PCS instance * @ops: a pointer to the &struct phylink_pcs_ops structure + * @phylink: pointer to &struct phylink_config * @neg_mode: provide PCS neg mode via "mode" argument * @poll: poll the PCS for link changes * * This structure is designed to be embedded within the PCS private data, * and will be passed between phylink and the PCS. + * + * The @phylink member is private to phylink and must not be touched by + * the PCS driver. */ struct phylink_pcs { const struct phylink_pcs_ops *ops; + struct phylink *phylink; bool neg_mode; bool poll; }; @@ -535,6 +475,10 @@ struct phylink_pcs { /** * struct phylink_pcs_ops - MAC PCS operations structure. * @pcs_validate: validate the link configuration. + * @pcs_enable: enable the PCS. + * @pcs_disable: disable the PCS. + * @pcs_pre_config: pre-mac_config method (for errata) + * @pcs_post_config: post-mac_config method (for arrata) * @pcs_get_state: read the current MAC PCS link state from the hardware. * @pcs_config: configure the MAC PCS for the selected mode and state. * @pcs_an_restart: restart 802.3z BaseX autonegotiation. @@ -544,6 +488,12 @@ struct phylink_pcs { struct phylink_pcs_ops { int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); + int (*pcs_enable)(struct phylink_pcs *pcs); + void (*pcs_disable)(struct phylink_pcs *pcs); + void (*pcs_pre_config)(struct phylink_pcs *pcs, + phy_interface_t interface); + int (*pcs_post_config)(struct phylink_pcs *pcs, + phy_interface_t interface); void (*pcs_get_state)(struct phylink_pcs *pcs, struct phylink_link_state *state); int (*pcs_config)(struct phylink_pcs *pcs, unsigned int neg_mode, @@ -574,6 +524,18 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); /** + * pcs_enable() - enable the PCS. + * @pcs: a pointer to a &struct phylink_pcs. + */ +int pcs_enable(struct phylink_pcs *pcs); + +/** + * pcs_disable() - disable the PCS. + * @pcs: a pointer to a &struct phylink_pcs. + */ +void pcs_disable(struct phylink_pcs *pcs); + +/** * pcs_get_state() - Read the current inband link state from the hardware * @pcs: a pointer to a &struct phylink_pcs. * @state: a pointer to a &struct phylink_link_state. @@ -585,8 +547,8 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported, * in @state->link. If possible, @state->lp_advertising should also be * populated. * - * When present, this overrides mac_pcs_get_state() in &struct - * phylink_mac_ops. + * When present, this overrides pcs_get_state() in &struct + * phylink_pcs_ops. */ void pcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state); @@ -615,7 +577,7 @@ void pcs_get_state(struct phylink_pcs *pcs, * * The %neg_mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested - * against the %PHYLINK_PCS_NEG_* definitions. + * against the PHYLINK_PCS_NEG_* definitions. */ int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, @@ -645,23 +607,12 @@ void pcs_an_restart(struct phylink_pcs *pcs); * * The %mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested - * against the %PHYLINK_PCS_NEG_* definitions. + * against the PHYLINK_PCS_NEG_* definitions. */ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); #endif -void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps); -unsigned long phylink_get_capabilities(phy_interface_t interface, - unsigned long mac_capabilities, - int rate_matching); -void phylink_validate_mask_caps(unsigned long *supported, - struct phylink_link_state *state, - unsigned long caps); -void phylink_generic_validate(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state); - struct phylink *phylink_create(struct phylink_config *, const struct fwnode_handle *, phy_interface_t, @@ -677,6 +628,7 @@ int phylink_fwnode_phy_connect(struct phylink *pl, void phylink_disconnect_phy(struct phylink *); void phylink_mac_change(struct phylink *, bool up); +void phylink_pcs_change(struct phylink_pcs *, bool up); void phylink_start(struct phylink *); void phylink_stop(struct phylink *); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index c758809d5bcf..f9f9931e02d6 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -17,18 +17,10 @@ struct fs_pin; #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) -/* - * sysctl for vm.memfd_noexec - * 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL - * acts like MFD_EXEC was set. - * 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL - * acts like MFD_NOEXEC_SEAL was set. - * 2: memfd_create() without MFD_NOEXEC_SEAL will be - * rejected. - */ -#define MEMFD_NOEXEC_SCOPE_EXEC 0 -#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1 -#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2 +/* modes for vm.memfd_noexec sysctl */ +#define MEMFD_NOEXEC_SCOPE_EXEC 0 /* MFD_EXEC implied if unset */ +#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1 /* MFD_NOEXEC_SEAL implied if unset */ +#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2 /* same as 1, except MFD_EXEC rejected */ #endif struct pid_namespace { @@ -47,7 +39,6 @@ struct pid_namespace { int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) - /* sysctl for vm.memfd_noexec */ int memfd_noexec_scope; #endif } __randomize_layout; @@ -64,6 +55,23 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } +#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) +static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns) +{ + int scope = MEMFD_NOEXEC_SCOPE_EXEC; + + for (; ns; ns = ns->parent) + scope = max(scope, READ_ONCE(ns->memfd_noexec_scope)); + + return scope; +} +#else +static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns) +{ + return 0; +} +#endif + extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct user_namespace *user_ns, struct pid_namespace *ns); extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); @@ -78,6 +86,11 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } +static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns) +{ + return 0; +} + static inline struct pid_namespace *copy_pid_ns(unsigned long flags, struct user_namespace *user_ns, struct pid_namespace *ns) { diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 4729d54e8995..73de70362b98 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -17,6 +17,7 @@ #include <linux/pinctrl/pinctrl-state.h> struct device; +struct gpio_chip; /* This struct is private to the core and should be regarded as a cookie */ struct pinctrl; @@ -25,27 +26,30 @@ struct pinctrl_state; #ifdef CONFIG_PINCTRL /* External interface to pin control */ -extern bool pinctrl_gpio_can_use_line(unsigned gpio); -extern int pinctrl_gpio_request(unsigned gpio); -extern void pinctrl_gpio_free(unsigned gpio); -extern int pinctrl_gpio_direction_input(unsigned gpio); -extern int pinctrl_gpio_direction_output(unsigned gpio); -extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config); - -extern struct pinctrl * __must_check pinctrl_get(struct device *dev); -extern void pinctrl_put(struct pinctrl *p); -extern struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p, - const char *name); -extern int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s); - -extern struct pinctrl * __must_check devm_pinctrl_get(struct device *dev); -extern void devm_pinctrl_put(struct pinctrl *p); -extern int pinctrl_select_default_state(struct device *dev); +bool pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset); +int pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset); +void pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset); +int pinctrl_gpio_direction_input(struct gpio_chip *gc, + unsigned int offset); +int pinctrl_gpio_direction_output(struct gpio_chip *gc, + unsigned int offset); +int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, + unsigned long config); + +struct pinctrl * __must_check pinctrl_get(struct device *dev); +void pinctrl_put(struct pinctrl *p); +struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p, + const char *name); +int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s); + +struct pinctrl * __must_check devm_pinctrl_get(struct device *dev); +void devm_pinctrl_put(struct pinctrl *p); +int pinctrl_select_default_state(struct device *dev); #ifdef CONFIG_PM -extern int pinctrl_pm_select_default_state(struct device *dev); -extern int pinctrl_pm_select_sleep_state(struct device *dev); -extern int pinctrl_pm_select_idle_state(struct device *dev); +int pinctrl_pm_select_default_state(struct device *dev); +int pinctrl_pm_select_sleep_state(struct device *dev); +int pinctrl_pm_select_idle_state(struct device *dev); #else static inline int pinctrl_pm_select_default_state(struct device *dev) { @@ -63,31 +67,38 @@ static inline int pinctrl_pm_select_idle_state(struct device *dev) #else /* !CONFIG_PINCTRL */ -static inline bool pinctrl_gpio_can_use_line(unsigned gpio) +static inline bool +pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset) { return true; } -static inline int pinctrl_gpio_request(unsigned gpio) +static inline int +pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline void pinctrl_gpio_free(unsigned gpio) +static inline void +pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset) { } -static inline int pinctrl_gpio_direction_input(unsigned gpio) +static inline int +pinctrl_gpio_direction_input(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline int pinctrl_gpio_direction_output(unsigned gpio) +static inline int +pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline int pinctrl_gpio_set_config(unsigned gpio, unsigned long config) +static inline int +pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, + unsigned long config) { return 0; } diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h index 0639b36f43c5..ee8803f6ad07 100644 --- a/include/linux/pinctrl/machine.h +++ b/include/linux/pinctrl/machine.h @@ -11,7 +11,7 @@ #ifndef __LINUX_PINCTRL_MACHINE_H #define __LINUX_PINCTRL_MACHINE_H -#include <linux/kernel.h> /* ARRAY_SIZE() */ +#include <linux/array_size.h> #include <linux/pinctrl/pinctrl-state.h> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 02e0086b10f6..8ff23bf5a819 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -62,9 +62,6 @@ struct pipe_inode_info { unsigned int tail; unsigned int max_usage; unsigned int ring_size; -#ifdef CONFIG_WATCH_QUEUE - bool note_loss; -#endif unsigned int nr_accounted; unsigned int readers; unsigned int writers; @@ -72,6 +69,9 @@ struct pipe_inode_info { unsigned int r_counter; unsigned int w_counter; bool poll_usage; +#ifdef CONFIG_WATCH_QUEUE + bool note_loss; +#endif struct page *tmp_page; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; @@ -125,6 +125,22 @@ struct pipe_buf_operations { }; /** + * pipe_has_watch_queue - Check whether the pipe is a watch_queue, + * i.e. it was created with O_NOTIFICATION_PIPE + * @pipe: The pipe to check + * + * Return: true if pipe is a watch queue, false otherwise. + */ +static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) +{ +#ifdef CONFIG_WATCH_QUEUE + return pipe->watch_queue != NULL; +#else + return false; +#endif +} + +/** * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer @@ -269,10 +285,10 @@ bool pipe_is_unprivileged_user(void); /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots); -long pipe_fcntl(struct file *, unsigned int, unsigned long arg); +long pipe_fcntl(struct file *, unsigned int, unsigned int arg); struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); int create_pipe_files(struct file **, int); -unsigned int round_pipe_size(unsigned long size); +unsigned int round_pipe_size(unsigned int size); #endif diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 80cb00db42a4..79594aeb160d 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -154,7 +154,9 @@ struct packet_stacked_data struct pktcdvd_device { - struct block_device *bdev; /* dev attached */ + struct bdev_handle *bdev_handle; /* dev attached */ + /* handle acquired for bdev during pkt_open_dev() */ + struct bdev_handle *open_bdev_handle; dev_t pkt_dev; /* our dev */ struct packet_settings settings; struct packet_stats stats; diff --git a/include/linux/platform_data/bd6107.h b/include/linux/platform_data/bd6107.h index 54a06a4d2618..596ca4f95cfa 100644 --- a/include/linux/platform_data/bd6107.h +++ b/include/linux/platform_data/bd6107.h @@ -8,7 +8,7 @@ struct device; struct bd6107_platform_data { - struct device *fbdev; + struct device *dev; unsigned int def_value; }; diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ab721cf13a98..7dae17b62a4d 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4436,8 +4436,20 @@ struct ec_response_i2c_passthru_protect { * These commands are for sending and receiving message via HDMI CEC */ +#define EC_CEC_MAX_PORTS 16 + #define MAX_CEC_MSG_LEN 16 +/* + * Helper macros for packing/unpacking cec_events. + * bits[27:0] : bitmask of events from enum mkbp_cec_event + * bits[31:28]: port number + */ +#define EC_MKBP_EVENT_CEC_PACK(events, port) \ + (((events) & GENMASK(27, 0)) | (((port) & 0xf) << 28)) +#define EC_MKBP_EVENT_CEC_GET_EVENTS(event) ((event) & GENMASK(27, 0)) +#define EC_MKBP_EVENT_CEC_GET_PORT(event) (((event) >> 28) & 0xf) + /* CEC message from the AP to be written on the CEC bus */ #define EC_CMD_CEC_WRITE_MSG 0x00B8 @@ -4449,19 +4461,54 @@ struct ec_params_cec_write { uint8_t msg[MAX_CEC_MSG_LEN]; } __ec_align1; +/** + * struct ec_params_cec_write_v1 - Message to write to the CEC bus + * @port: CEC port to write the message on + * @msg_len: length of msg in bytes + * @msg: message content to write to the CEC bus + */ +struct ec_params_cec_write_v1 { + uint8_t port; + uint8_t msg_len; + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + +/* CEC message read from a CEC bus reported back to the AP */ +#define EC_CMD_CEC_READ_MSG 0x00B9 + +/** + * struct ec_params_cec_read - Read a message from the CEC bus + * @port: CEC port to read a message on + */ +struct ec_params_cec_read { + uint8_t port; +} __ec_align1; + +/** + * struct ec_response_cec_read - Message read from the CEC bus + * @msg_len: length of msg in bytes + * @msg: message content read from the CEC bus + */ +struct ec_response_cec_read { + uint8_t msg_len; + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + /* Set various CEC parameters */ #define EC_CMD_CEC_SET 0x00BA /** * struct ec_params_cec_set - CEC parameters set * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @port: CEC port to set the parameter on * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC * or 1 to enable CEC functionality, in case cmd is * CEC_CMD_LOGICAL_ADDRESS, this field encodes the requested logical * address between 0 and 15 or 0xff to unregister */ struct ec_params_cec_set { - uint8_t cmd; /* enum cec_command */ + uint8_t cmd : 4; /* enum cec_command */ + uint8_t port : 4; uint8_t val; } __ec_align1; @@ -4471,9 +4518,11 @@ struct ec_params_cec_set { /** * struct ec_params_cec_get - CEC parameters get * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @port: CEC port to get the parameter on */ struct ec_params_cec_get { - uint8_t cmd; /* enum cec_command */ + uint8_t cmd : 4; /* enum cec_command */ + uint8_t port : 4; } __ec_align1; /** @@ -4487,6 +4536,17 @@ struct ec_response_cec_get { uint8_t val; } __ec_align1; +/* Get the number of CEC ports */ +#define EC_CMD_CEC_PORT_COUNT 0x00C1 + +/** + * struct ec_response_cec_port_count - CEC port count response + * @port_count: number of CEC ports + */ +struct ec_response_cec_port_count { + uint8_t port_count; +} __ec_align1; + /* CEC parameters command */ enum cec_command { /* CEC reading, writing and events enable */ @@ -4501,6 +4561,8 @@ enum mkbp_cec_event { EC_MKBP_CEC_SEND_OK = BIT(0), /* Outgoing message was not acknowledged */ EC_MKBP_CEC_SEND_FAILED = BIT(1), + /* Incoming message can be read out by AP */ + EC_MKBP_CEC_HAVE_DATA = BIT(2), }; /*****************************************************************************/ diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 4f9f756bc17c..8865e350c12a 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -258,7 +258,7 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature); int cros_ec_get_sensor_count(struct cros_ec_dev *ec); -int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata, +int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, const void *outdata, size_t outsize, void *indata, size_t insize); /** diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index f377817ce75c..cdd8cfb424f5 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -144,9 +144,6 @@ #define OMAP_MAX_GPIO_LINES 192 -#define OMAP_MPUIO(nr) (OMAP_MAX_GPIO_LINES + (nr)) -#define OMAP_GPIO_IS_MPUIO(nr) ((nr) >= OMAP_MAX_GPIO_LINES) - #ifndef __ASSEMBLER__ struct omap_gpio_reg_offs { u16 revision; diff --git a/include/linux/platform_data/gpio_backlight.h b/include/linux/platform_data/gpio_backlight.h index 1a8b5b1946fe..323fbf5f7613 100644 --- a/include/linux/platform_data/gpio_backlight.h +++ b/include/linux/platform_data/gpio_backlight.h @@ -8,7 +8,7 @@ struct device; struct gpio_backlight_platform_data { - struct device *fbdev; + struct device *dev; }; #endif diff --git a/include/linux/platform_data/gsc_hwmon.h b/include/linux/platform_data/gsc_hwmon.h index f2781aa7eff8..70e8a6bec0f6 100644 --- a/include/linux/platform_data/gsc_hwmon.h +++ b/include/linux/platform_data/gsc_hwmon.h @@ -40,6 +40,6 @@ struct gsc_hwmon_platform_data { unsigned int resolution; unsigned int vreference; unsigned int fan_base; - struct gsc_hwmon_channel channels[]; + struct gsc_hwmon_channel channels[] __counted_by(nchannels); }; #endif diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h index 6a000df5541f..8748680e9e3c 100644 --- a/include/linux/platform_data/hirschmann-hellcreek.h +++ b/include/linux/platform_data/hirschmann-hellcreek.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0 or MIT) */ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* * Hirschmann Hellcreek TSN switch platform data. * diff --git a/include/linux/platform_data/lv5207lp.h b/include/linux/platform_data/lv5207lp.h index c9da8d402750..95d85c1394bc 100644 --- a/include/linux/platform_data/lv5207lp.h +++ b/include/linux/platform_data/lv5207lp.h @@ -8,7 +8,7 @@ struct device; struct lv5207lp_platform_data { - struct device *fbdev; + struct device *dev; unsigned int max_value; unsigned int def_value; }; diff --git a/include/linux/platform_data/omap-twl4030.h b/include/linux/platform_data/omap-twl4030.h index 0dd851ea1c72..7fcb55fe21c9 100644 --- a/include/linux/platform_data/omap-twl4030.h +++ b/include/linux/platform_data/omap-twl4030.h @@ -37,9 +37,6 @@ struct omap_tw4030_pdata { bool has_digimic0; bool has_digimic1; u8 has_linein; - - /* Jack detect GPIO or <= 0 if it is not implemented */ - int jack_detect; }; #endif /* _OMAP_TWL4030_H_ */ diff --git a/include/linux/platform_data/pca953x.h b/include/linux/platform_data/pca953x.h index 96c1a14ab365..3c3787c4d96c 100644 --- a/include/linux/platform_data/pca953x.h +++ b/include/linux/platform_data/pca953x.h @@ -11,21 +11,8 @@ struct pca953x_platform_data { /* number of the first GPIO */ unsigned gpio_base; - /* initial polarity inversion setting */ - u32 invert; - /* interrupt base */ int irq_base; - - void *context; /* param to setup/teardown */ - - int (*setup)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - void (*teardown)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - const char *const *names; }; #endif /* _LINUX_PCA953X_H */ diff --git a/include/linux/platform_data/rtc-ds2404.h b/include/linux/platform_data/rtc-ds2404.h deleted file mode 100644 index 22c53825528f..000000000000 --- a/include/linux/platform_data/rtc-ds2404.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * ds2404.h - platform data structure for the DS2404 RTC. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2012 Sven Schnelle <svens@stackframe.org> - */ - -#ifndef __LINUX_DS2404_H -#define __LINUX_DS2404_H - -struct ds2404_platform_data { - - unsigned int gpio_rst; - unsigned int gpio_clk; - unsigned int gpio_dq; -}; -#endif diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index d661399b217d..6c19d4fbbe39 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -10,7 +10,7 @@ #ifndef __SHMOB_DRM_H__ #define __SHMOB_DRM_H__ -#include <drm/drm_mode.h> +#include <video/videomode.h> enum shmob_drm_clk_source { SHMOB_DRM_CLK_BUS, @@ -18,72 +18,21 @@ enum shmob_drm_clk_source { SHMOB_DRM_CLK_EXTERNAL, }; -enum shmob_drm_interface { - SHMOB_DRM_IFACE_RGB8, /* 24bpp, 8:8:8 */ - SHMOB_DRM_IFACE_RGB9, /* 18bpp, 9:9 */ - SHMOB_DRM_IFACE_RGB12A, /* 24bpp, 12:12 */ - SHMOB_DRM_IFACE_RGB12B, /* 12bpp */ - SHMOB_DRM_IFACE_RGB16, /* 16bpp */ - SHMOB_DRM_IFACE_RGB18, /* 18bpp */ - SHMOB_DRM_IFACE_RGB24, /* 24bpp */ - SHMOB_DRM_IFACE_YUV422, /* 16bpp */ - SHMOB_DRM_IFACE_SYS8A, /* 24bpp, 8:8:8 */ - SHMOB_DRM_IFACE_SYS8B, /* 18bpp, 8:8:2 */ - SHMOB_DRM_IFACE_SYS8C, /* 18bpp, 2:8:8 */ - SHMOB_DRM_IFACE_SYS8D, /* 16bpp, 8:8 */ - SHMOB_DRM_IFACE_SYS9, /* 18bpp, 9:9 */ - SHMOB_DRM_IFACE_SYS12, /* 24bpp, 12:12 */ - SHMOB_DRM_IFACE_SYS16A, /* 16bpp */ - SHMOB_DRM_IFACE_SYS16B, /* 18bpp, 16:2 */ - SHMOB_DRM_IFACE_SYS16C, /* 18bpp, 2:16 */ - SHMOB_DRM_IFACE_SYS18, /* 18bpp */ - SHMOB_DRM_IFACE_SYS24, /* 24bpp */ -}; - -struct shmob_drm_backlight_data { - const char *name; - int max_brightness; - int (*get_brightness)(void); - int (*set_brightness)(int brightness); -}; - struct shmob_drm_panel_data { unsigned int width_mm; /* Panel width in mm */ unsigned int height_mm; /* Panel height in mm */ - struct drm_mode_modeinfo mode; + struct videomode mode; }; -struct shmob_drm_sys_interface_data { - unsigned int read_latch:6; - unsigned int read_setup:8; - unsigned int read_cycle:8; - unsigned int read_strobe:8; - unsigned int write_setup:8; - unsigned int write_cycle:8; - unsigned int write_strobe:8; - unsigned int cs_setup:3; - unsigned int vsync_active_high:1; - unsigned int vsync_dir_input:1; -}; - -#define SHMOB_DRM_IFACE_FL_DWPOL (1 << 0) /* Rising edge dot clock data latch */ -#define SHMOB_DRM_IFACE_FL_DIPOL (1 << 1) /* Active low display enable */ -#define SHMOB_DRM_IFACE_FL_DAPOL (1 << 2) /* Active low display data */ -#define SHMOB_DRM_IFACE_FL_HSCNT (1 << 3) /* Disable HSYNC during VBLANK */ -#define SHMOB_DRM_IFACE_FL_DWCNT (1 << 4) /* Disable dotclock during blanking */ - struct shmob_drm_interface_data { - enum shmob_drm_interface interface; - struct shmob_drm_sys_interface_data sys; + unsigned int bus_fmt; /* MEDIA_BUS_FMT_* */ unsigned int clk_div; - unsigned int flags; }; struct shmob_drm_platform_data { enum shmob_drm_clk_source clk_source; struct shmob_drm_interface_data iface; struct shmob_drm_panel_data panel; - struct shmob_drm_backlight_data backlight; }; #endif /* __SHMOB_DRM_H__ */ diff --git a/include/linux/platform_data/video-mx3fb.h b/include/linux/platform_data/video-mx3fb.h deleted file mode 100644 index d03dc322a616..000000000000 --- a/include/linux/platform_data/video-mx3fb.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2008 - * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de> - */ - -#ifndef __ASM_ARCH_MX3FB_H__ -#define __ASM_ARCH_MX3FB_H__ - -#include <linux/device.h> -#include <linux/fb.h> - -/* Proprietary FB_SYNC_ flags */ -#define FB_SYNC_OE_ACT_HIGH 0x80000000 -#define FB_SYNC_CLK_INVERT 0x40000000 -#define FB_SYNC_DATA_INVERT 0x20000000 -#define FB_SYNC_CLK_IDLE_EN 0x10000000 -#define FB_SYNC_SHARP_MODE 0x08000000 -#define FB_SYNC_SWAP_RGB 0x04000000 -#define FB_SYNC_CLK_SEL_EN 0x02000000 - -/* - * Specify the way your display is connected. The IPU can arbitrarily - * map the internal colors to the external data lines. We only support - * the following mappings at the moment. - */ -enum disp_data_mapping { - /* blue -> d[0..5], green -> d[6..11], red -> d[12..17] */ - IPU_DISP_DATA_MAPPING_RGB666, - /* blue -> d[0..4], green -> d[5..10], red -> d[11..15] */ - IPU_DISP_DATA_MAPPING_RGB565, - /* blue -> d[0..7], green -> d[8..15], red -> d[16..23] */ - IPU_DISP_DATA_MAPPING_RGB888, -}; - -/** - * struct mx3fb_platform_data - mx3fb platform data - * - * @dma_dev: pointer to the dma-device, used for dma-slave connection - * @mode: pointer to a platform-provided per mxc_register_fb() videomode - */ -struct mx3fb_platform_data { - struct device *dma_dev; - const char *name; - const struct fb_videomode *mode; - int num_modes; - enum disp_data_mapping disp_data_fmt; -}; - -#endif diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 28234dc9fa6a..ab1c7deff118 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -58,6 +58,10 @@ #define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021 #define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */ #define ASUS_WMI_DEVID_LIGHTBAR 0x00050025 +/* This can only be used to disable the screen, not re-enable */ +#define ASUS_WMI_DEVID_SCREENPAD_POWER 0x00050031 +/* Writing a brightness re-enables the screen if disabled */ +#define ASUS_WMI_DEVID_SCREENPAD_LIGHT 0x00050032 #define ASUS_WMI_DEVID_FAN_BOOST_MODE 0x00110018 #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075 @@ -66,6 +70,7 @@ #define ASUS_WMI_DEVID_CAMERA 0x00060013 #define ASUS_WMI_DEVID_LID_FLIP 0x00060062 #define ASUS_WMI_DEVID_LID_FLIP_ROG 0x00060077 +#define ASUS_WMI_DEVID_MINI_LED_MODE 0x0005001E /* Storage */ #define ASUS_WMI_DEVID_CARDREADER 0x00080013 @@ -80,8 +85,19 @@ #define ASUS_WMI_DEVID_FAN_CTRL 0x00110012 /* deprecated */ #define ASUS_WMI_DEVID_CPU_FAN_CTRL 0x00110013 #define ASUS_WMI_DEVID_GPU_FAN_CTRL 0x00110014 +#define ASUS_WMI_DEVID_MID_FAN_CTRL 0x00110031 #define ASUS_WMI_DEVID_CPU_FAN_CURVE 0x00110024 #define ASUS_WMI_DEVID_GPU_FAN_CURVE 0x00110025 +#define ASUS_WMI_DEVID_MID_FAN_CURVE 0x00110032 + +/* Tunables for AUS ROG laptops */ +#define ASUS_WMI_DEVID_PPT_PL2_SPPT 0x001200A0 +#define ASUS_WMI_DEVID_PPT_PL1_SPL 0x001200A3 +#define ASUS_WMI_DEVID_PPT_APU_SPPT 0x001200B0 +#define ASUS_WMI_DEVID_PPT_PLAT_SPPT 0x001200B1 +#define ASUS_WMI_DEVID_PPT_FPPT 0x001200C1 +#define ASUS_WMI_DEVID_NV_DYN_BOOST 0x001200C0 +#define ASUS_WMI_DEVID_NV_THERM_TARGET 0x001200C2 /* Power */ #define ASUS_WMI_DEVID_PROCESSOR_STATE 0x00120012 @@ -95,7 +111,15 @@ /* Keyboard dock */ #define ASUS_WMI_DEVID_KBD_DOCK 0x00120063 -/* dgpu on/off */ +/* Charging mode - 1=Barrel, 2=USB */ +#define ASUS_WMI_DEVID_CHARGE_MODE 0x0012006C + +/* MCU powersave mode */ +#define ASUS_WMI_DEVID_MCU_POWERSAVE 0x001200E2 + +/* epu is connected? 1 == true */ +#define ASUS_WMI_DEVID_EGPU_CONNECTED 0x00090018 +/* egpu on/off */ #define ASUS_WMI_DEVID_EGPU 0x00090019 /* dgpu on/off */ diff --git a/include/linux/platform_data/x86/simatic-ipc-base.h b/include/linux/platform_data/x86/simatic-ipc-base.h index 57d6a10dfc9e..2d7f7120ba6b 100644 --- a/include/linux/platform_data/x86/simatic-ipc-base.h +++ b/include/linux/platform_data/x86/simatic-ipc-base.h @@ -2,7 +2,7 @@ /* * Siemens SIMATIC IPC drivers * - * Copyright (c) Siemens AG, 2018-2021 + * Copyright (c) Siemens AG, 2018-2023 * * Authors: * Henning Schild <henning.schild@siemens.com> @@ -20,6 +20,9 @@ #define SIMATIC_IPC_DEVICE_127E 3 #define SIMATIC_IPC_DEVICE_227E 4 #define SIMATIC_IPC_DEVICE_227G 5 +#define SIMATIC_IPC_DEVICE_BX_21A 6 +#define SIMATIC_IPC_DEVICE_BX_39A 7 +#define SIMATIC_IPC_DEVICE_BX_59A 8 struct simatic_ipc_platform { u8 devmode; diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h index a48bb5240977..8d8b3b919674 100644 --- a/include/linux/platform_data/x86/simatic-ipc.h +++ b/include/linux/platform_data/x86/simatic-ipc.h @@ -2,7 +2,7 @@ /* * Siemens SIMATIC IPC drivers * - * Copyright (c) Siemens AG, 2018-2021 + * Copyright (c) Siemens AG, 2018-2023 * * Authors: * Henning Schild <henning.schild@siemens.com> @@ -32,8 +32,12 @@ enum simatic_ipc_station_ids { SIMATIC_IPC_IPC477E = 0x00000A02, SIMATIC_IPC_IPC127E = 0x00000D01, SIMATIC_IPC_IPC227G = 0x00000F01, + SIMATIC_IPC_IPC277G = 0x00000F02, SIMATIC_IPC_IPCBX_39A = 0x00001001, SIMATIC_IPC_IPCPX_39A = 0x00001002, + SIMATIC_IPC_IPCBX_21A = 0x00001101, + SIMATIC_IPC_IPCBX_56A = 0x00001201, + SIMATIC_IPC_IPCBX_59A = 0x00001202, }; static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index b845fd83f429..7a41c72c1959 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -63,6 +63,8 @@ extern struct resource *platform_get_mem_or_io(struct platform_device *, extern struct device * platform_find_device_by_driver(struct device *start, const struct device_driver *drv); + +#ifdef CONFIG_HAS_IOMEM extern void __iomem * devm_platform_get_and_ioremap_resource(struct platform_device *pdev, unsigned int index, struct resource **res); @@ -72,6 +74,32 @@ devm_platform_ioremap_resource(struct platform_device *pdev, extern void __iomem * devm_platform_ioremap_resource_byname(struct platform_device *pdev, const char *name); +#else + +static inline void __iomem * +devm_platform_get_and_ioremap_resource(struct platform_device *pdev, + unsigned int index, struct resource **res) +{ + return ERR_PTR(-EINVAL); +} + + +static inline void __iomem * +devm_platform_ioremap_resource(struct platform_device *pdev, + unsigned int index) +{ + return ERR_PTR(-EINVAL); +} + +static inline void __iomem * +devm_platform_ioremap_resource_byname(struct platform_device *pdev, + const char *name) +{ + return ERR_PTR(-EINVAL); +} + +#endif + extern int platform_get_irq(struct platform_device *, unsigned int); extern int platform_get_irq_optional(struct platform_device *, unsigned int); extern int platform_irq_count(struct platform_device *); diff --git a/include/linux/pm.h b/include/linux/pm.h index badad7d11f4f..92a4f69de0e8 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -374,24 +374,39 @@ const struct dev_pm_ops name = { \ RUNTIME_PM_OPS(runtime_suspend_fn, runtime_resume_fn, idle_fn) \ } -#ifdef CONFIG_PM -#define _EXPORT_DEV_PM_OPS(name, license, ns) \ +#define _EXPORT_PM_OPS(name, license, ns) \ const struct dev_pm_ops name; \ __EXPORT_SYMBOL(name, license, ns); \ const struct dev_pm_ops name -#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) -#else -#define _EXPORT_DEV_PM_OPS(name, license, ns) \ + +#define _DISCARD_PM_OPS(name, license, ns) \ static __maybe_unused const struct dev_pm_ops __static_##name + +#ifdef CONFIG_PM +#define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) +#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) +#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) +#else +#define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) #define EXPORT_PM_FN_GPL(name) #define EXPORT_PM_FN_NS_GPL(name, ns) #endif -#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") -#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "") -#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) -#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns) +#ifdef CONFIG_PM_SLEEP +#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) +#else +#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) +#endif + +#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "") +#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns) + +#define EXPORT_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", "") +#define EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", #ns) /* * Use this if you want to use the same suspend and resume callbacks for suspend @@ -404,19 +419,19 @@ const struct dev_pm_ops name = { \ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - EXPORT_DEV_PM_OPS(name) = { \ + EXPORT_DEV_SLEEP_PM_OPS(name) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - EXPORT_GPL_DEV_PM_OPS(name) = { \ + EXPORT_GPL_DEV_SLEEP_PM_OPS(name) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - EXPORT_NS_DEV_PM_OPS(name, ns) = { \ + EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \ + EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } @@ -448,6 +463,15 @@ const struct dev_pm_ops __maybe_unused name = { \ SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } +/* + * Use this if you want to have the suspend and resume callbacks be called + * with IRQs disabled. + */ +#define DEFINE_NOIRQ_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +const struct dev_pm_ops name = { \ + NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ +} + #define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr)) #define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr)) @@ -710,6 +734,7 @@ extern void dev_pm_put_subsys_data(struct device *dev); * @activate: Called before executing probe routines for bus types and drivers. * @sync: Called after successful driver probe. * @dismiss: Called after unsuccessful driver probe and after driver removal. + * @set_performance_state: Called to request a new performance state. * * Power domains provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions instead of @@ -722,6 +747,7 @@ struct dev_pm_domain { int (*activate)(struct device *dev); void (*sync)(struct device *dev); void (*dismiss)(struct device *dev); + int (*set_performance_state)(struct device *dev, unsigned int state); }; /* diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f776fb93eaa0..34663d0d5c55 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -61,6 +61,10 @@ * GENPD_FLAG_MIN_RESIDENCY: Enable the genpd governor to consider its * components' next wakeup when determining the * optimal idle state. + * + * GENPD_FLAG_OPP_TABLE_FW: The genpd provider supports performance states, + * but its corresponding OPP tables are not + * described in DT, but are given directly by FW. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) @@ -69,6 +73,7 @@ #define GENPD_FLAG_CPU_DOMAIN (1U << 4) #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) #define GENPD_FLAG_MIN_RESIDENCY (1U << 6) +#define GENPD_FLAG_OPP_TABLE_FW (1U << 7) enum gpd_status { GENPD_STATE_ON = 0, /* PM domain is on */ @@ -430,6 +435,7 @@ struct device *dev_pm_domain_attach_by_name(struct device *dev, void dev_pm_domain_detach(struct device *dev, bool power_off); int dev_pm_domain_start(struct device *dev); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); +int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state); #else static inline int dev_pm_domain_attach(struct device *dev, bool power_on) { @@ -452,6 +458,11 @@ static inline int dev_pm_domain_start(struct device *dev) } static inline void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd) {} +static inline int dev_pm_domain_set_performance_state(struct device *dev, + unsigned int state) +{ + return 0; +} #endif #endif /* _LINUX_PM_DOMAIN_H */ diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index dc1fb5890792..ccd97bcef269 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -92,6 +92,18 @@ struct dev_pm_opp_config { struct device ***virt_devs; }; +/** + * struct dev_pm_opp_data - The data to use to initialize an OPP. + * @level: The performance level for the OPP. + * @freq: The clock rate in Hz for the OPP. + * @u_volt: The voltage in uV for the OPP. + */ +struct dev_pm_opp_data { + unsigned int level; + unsigned long freq; + unsigned long u_volt; +}; + #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); @@ -103,7 +115,7 @@ int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *su unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp); -unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); +unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index); unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp); @@ -121,16 +133,31 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available); + +struct dev_pm_opp * +dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, + u32 index, bool available); + struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); +struct dev_pm_opp *dev_pm_opp_find_freq_floor_indexed(struct device *dev, + unsigned long *freq, u32 index); + +struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, + unsigned long *freq); + +struct dev_pm_opp *dev_pm_opp_find_freq_ceil_indexed(struct device *dev, + unsigned long *freq, u32 index); + struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, unsigned int level); + struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, unsigned int *level); -struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, - unsigned long *freq); +struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned long *level); struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index); @@ -140,8 +167,8 @@ struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, void dev_pm_opp_put(struct dev_pm_opp *opp); -int dev_pm_opp_add(struct device *dev, unsigned long freq, - unsigned long u_volt); +int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp); + void dev_pm_opp_remove(struct device *dev, unsigned long freq); void dev_pm_opp_remove_all_dynamic(struct device *dev); @@ -200,7 +227,7 @@ static inline unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp) return 0; } -static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) +static inline unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index) { return 0; } @@ -247,26 +274,27 @@ static inline unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev) return 0; } -static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, - unsigned int level) +static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, + unsigned long freq, bool available) { return ERR_PTR(-EOPNOTSUPP); } -static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, - unsigned int *level) +static inline struct dev_pm_opp * +dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, + u32 index, bool available) { return ERR_PTR(-EOPNOTSUPP); } -static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, - unsigned long freq, bool available) +static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, + unsigned long *freq) { return ERR_PTR(-EOPNOTSUPP); } -static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, - unsigned long *freq) +static inline struct dev_pm_opp * +dev_pm_opp_find_freq_floor_indexed(struct device *dev, unsigned long *freq, u32 index) { return ERR_PTR(-EOPNOTSUPP); } @@ -277,6 +305,30 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp * +dev_pm_opp_find_freq_ceil_indexed(struct device *dev, unsigned long *freq, u32 index) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, + unsigned int level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, + unsigned int *level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned long *level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index) { @@ -291,8 +343,8 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} -static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, - unsigned long u_volt) +static inline int +dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp) { return -EOPNOTSUPP; } @@ -488,6 +540,17 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta /* OPP Configuration helpers */ +static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, + unsigned long u_volt) +{ + struct dev_pm_opp_data data = { + .freq = freq, + .u_volt = u_volt, + }; + + return dev_pm_opp_add_dynamic(dev, &data); +} + /* Regulators helpers */ static inline int dev_pm_opp_set_regulators(struct device *dev, const char * const names[]) @@ -631,4 +694,9 @@ static inline void dev_pm_opp_put_prop_name(int token) dev_pm_opp_clear_config(token); } +static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) +{ + return dev_pm_opp_get_freq_indexed(opp, 0); +} + #endif /* __LINUX_OPP_H__ */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 9a8151a2bdea..7c9b35448563 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -85,8 +85,6 @@ extern void pm_runtime_irq_safe(struct device *dev); extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern u64 pm_runtime_autosuspend_expiration(struct device *dev); -extern void pm_runtime_update_max_time_suspended(struct device *dev, - s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 77f4849e3418..6eb9adaef52b 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -194,6 +194,16 @@ static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec, #endif /* !CONFIG_PM_SLEEP */ +static inline bool device_awake_path(struct device *dev) +{ + return device_wakeup_path(dev); +} + +static inline void device_set_awake_path(struct device *dev) +{ + device_set_wakeup_path(dev); +} + static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) { return pm_wakeup_ws_event(ws, msec, false); diff --git a/include/linux/pnp.h b/include/linux/pnp.h index c2a7cfbca713..267fb8a4fb6e 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -291,7 +291,7 @@ static inline void pnp_set_drvdata(struct pnp_dev *pdev, void *data) struct pnp_fixup { char id[7]; - void (*quirk_function) (struct pnp_dev * dev); /* fixup function */ + void (*quirk_function) (struct pnp_dev *dev); /* fixup function */ }; /* config parameters */ @@ -419,8 +419,8 @@ struct pnp_protocol { /* protocol specific suspend/resume */ bool (*can_wakeup) (struct pnp_dev *dev); - int (*suspend) (struct pnp_dev * dev, pm_message_t state); - int (*resume) (struct pnp_dev * dev); + int (*suspend) (struct pnp_dev *dev, pm_message_t state); + int (*resume) (struct pnp_dev *dev); /* used by pnp layer only (look but don't touch) */ unsigned char number; /* protocol number */ @@ -492,7 +492,7 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; } -static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0;} +static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0; } /* protocol helpers */ static inline int pnp_is_active(struct pnp_dev *dev) { return 0; } diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 468328b1e1dd..ef8619f48920 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -14,6 +14,7 @@ #include <linux/rwsem.h> struct posix_clock; +struct posix_clock_context; /** * struct posix_clock_operations - functional interface to the clock @@ -50,18 +51,18 @@ struct posix_clock_operations { /* * Optional character device methods: */ - long (*ioctl) (struct posix_clock *pc, - unsigned int cmd, unsigned long arg); + long (*ioctl)(struct posix_clock_context *pccontext, unsigned int cmd, + unsigned long arg); - int (*open) (struct posix_clock *pc, fmode_t f_mode); + int (*open)(struct posix_clock_context *pccontext, fmode_t f_mode); - __poll_t (*poll) (struct posix_clock *pc, - struct file *file, poll_table *wait); + __poll_t (*poll)(struct posix_clock_context *pccontext, struct file *file, + poll_table *wait); - int (*release) (struct posix_clock *pc); + int (*release)(struct posix_clock_context *pccontext); - ssize_t (*read) (struct posix_clock *pc, - uint flags, char __user *buf, size_t cnt); + ssize_t (*read)(struct posix_clock_context *pccontext, uint flags, + char __user *buf, size_t cnt); }; /** @@ -91,6 +92,24 @@ struct posix_clock { }; /** + * struct posix_clock_context - represents clock file operations context + * + * @clk: Pointer to the clock + * @private_clkdata: Pointer to user data + * + * Drivers should use struct posix_clock_context during specific character + * device file operation methods to access the posix clock. + * + * Drivers can store a private data structure during the open operation + * if they have specific information that is required in other file + * operations. + */ +struct posix_clock_context { + struct posix_clock *clk; + void *private_clkdata; +}; + +/** * posix_clock_register() - register a new clock * @clk: Pointer to the clock. Caller must provide 'ops' field * @dev: Pointer to the initialized device. Caller must provide diff --git a/include/linux/power/power_on_reason.h b/include/linux/power/power_on_reason.h new file mode 100644 index 000000000000..95a1ec0c403c --- /dev/null +++ b/include/linux/power/power_on_reason.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Author: Kamel Bouhra <kamel.bouhara@bootlin.com> + */ + +#ifndef POWER_ON_REASON_H +#define POWER_ON_REASON_H + +#define POWER_ON_REASON_REGULAR "regular power-up" +#define POWER_ON_REASON_RTC "RTC wakeup" +#define POWER_ON_REASON_WATCHDOG "watchdog timeout" +#define POWER_ON_REASON_SOFTWARE "software reset" +#define POWER_ON_REASON_RST_BTN "reset button action" +#define POWER_ON_REASON_CPU_CLK_FAIL "CPU clock failure" +#define POWER_ON_REASON_XTAL_FAIL "crystal oscillator failure" +#define POWER_ON_REASON_BROWN_OUT "brown-out reset" +#define POWER_ON_REASON_UNKNOWN "unknown reason" + +#endif /* POWER_ON_REASON_H */ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index a427f13c757f..c0992a77feea 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -767,7 +767,6 @@ struct power_supply_battery_info { int bti_resistance_tolerance; }; -extern struct atomic_notifier_head power_supply_notifier; extern int power_supply_reg_notifier(struct notifier_block *nb); extern void power_supply_unreg_notifier(struct notifier_block *nb); #if IS_ENABLED(CONFIG_POWER_SUPPLY) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 1424670df161..9aa6358a1a16 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -99,14 +99,21 @@ static __always_inline unsigned char interrupt_context_level(void) return level; } +/* + * These macro definitions avoid redundant invocations of preempt_count() + * because such invocations would result in redundant loads given that + * preempt_count() is commonly implemented with READ_ONCE(). + */ + #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT # define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) +# define irq_count() ((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | softirq_count()) #else # define softirq_count() (preempt_count() & SOFTIRQ_MASK) +# define irq_count() (preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK)) #endif -#define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* * Macros to retrieve the current execution context: @@ -119,7 +126,11 @@ static __always_inline unsigned char interrupt_context_level(void) #define in_nmi() (nmi_count()) #define in_hardirq() (hardirq_count()) #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) -#define in_task() (!(in_nmi() | in_hardirq() | in_serving_softirq())) +#ifdef CONFIG_PREEMPT_RT +# define in_task() (!((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | in_serving_softirq())) +#else +# define in_task() (!(preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) +#endif /* * The following macros are deprecated and should not be used in new code: diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 253f2676d93a..de407e7c3b55 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -159,6 +159,7 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, #endif /* CONFIG_PROC_PID_ARCH_STATUS */ void arch_report_meminfo(struct seq_file *m); +void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task); #else /* CONFIG_PROC_FS */ diff --git a/include/linux/property.h b/include/linux/property.h index 8c3c6685a2ae..9f2585d705a8 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -10,6 +10,7 @@ #ifndef _LINUX_PROPERTY_H_ #define _LINUX_PROPERTY_H_ +#include <linux/args.h> #include <linux/bits.h> #include <linux/fwnode.h> #include <linux/stddef.h> @@ -288,7 +289,7 @@ struct software_node_ref_args { #define SOFTWARE_NODE_REFERENCE(_ref_, ...) \ (const struct software_node_ref_args) { \ .node = _ref_, \ - .nargs = ARRAY_SIZE(((u64[]){ 0, ##__VA_ARGS__ })) - 1, \ + .nargs = COUNT_ARGS(__VA_ARGS__), \ .args = { __VA_ARGS__ }, \ } diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index eceda1d1407a..730f77381d55 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -5,7 +5,7 @@ struct pseudo_fs_context { const struct super_operations *ops; - const struct xattr_handler **xattr; + const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; }; diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h index 75da8f5f7ad8..c1dc87fc536b 100644 --- a/include/linux/psp-platform-access.h +++ b/include/linux/psp-platform-access.h @@ -8,6 +8,10 @@ enum psp_platform_access_msg { PSP_CMD_NONE = 0x0, PSP_I2C_REQ_BUS_CMD = 0x64, + PSP_DYNAMIC_BOOST_GET_NONCE, + PSP_DYNAMIC_BOOST_SET_UID, + PSP_DYNAMIC_BOOST_GET_PARAMETER, + PSP_DYNAMIC_BOOST_SET_PARAMETER, }; struct psp_req_buffer_hdr { diff --git a/include/linux/ptp_mock.h b/include/linux/ptp_mock.h new file mode 100644 index 000000000000..72eb401034d9 --- /dev/null +++ b/include/linux/ptp_mock.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Mock-up PTP Hardware Clock driver for virtual network devices + * + * Copyright 2023 NXP + */ + +#ifndef _PTP_MOCK_H_ +#define _PTP_MOCK_H_ + +struct device; +struct mock_phc; + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK_MOCK) + +struct mock_phc *mock_phc_create(struct device *dev); +void mock_phc_destroy(struct mock_phc *phc); +int mock_phc_index(struct mock_phc *phc); + +#else + +static inline struct mock_phc *mock_phc_create(struct device *dev) +{ + return NULL; +} + +static inline void mock_phc_destroy(struct mock_phc *phc) +{ +} + +static inline int mock_phc_index(struct mock_phc *phc) +{ + return -1; +} + +#endif + +#endif /* _PTP_MOCK_H_ */ diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 04ae1d9073a7..cda3597b84f2 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -41,8 +41,8 @@ struct pwm_args { }; enum { - PWMF_REQUESTED = 1 << 0, - PWMF_EXPORTED = 1 << 1, + PWMF_REQUESTED = 0, + PWMF_EXPORTED = 1, }; /* @@ -71,7 +71,6 @@ struct pwm_state { * @hwpwm: per-chip relative index of the PWM device * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device - * @chip_data: chip-private data associated with the PWM device * @args: PWM arguments * @state: last applied state * @last: last implemented state (for PWM_DEBUG) @@ -82,7 +81,6 @@ struct pwm_device { unsigned int hwpwm; unsigned int pwm; struct pwm_chip *chip; - void *chip_data; struct pwm_args args; struct pwm_state state; @@ -267,7 +265,6 @@ struct pwm_capture { * @get_state: get the current PWM state. This function is only * called once per PWM device when the PWM chip is * registered. - * @owner: helps prevent removal of modules exporting active PWMs */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); @@ -278,13 +275,13 @@ struct pwm_ops { const struct pwm_state *state); int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); - struct module *owner; }; /** * struct pwm_chip - abstract a PWM controller * @dev: device providing the PWMs * @ops: callbacks for this PWM controller + * @owner: module providing this chip * @base: number of first PWM controlled by this chip * @npwm: number of PWMs controlled by this chip * @of_xlate: request a PWM device given a device tree PWM specifier @@ -295,10 +292,11 @@ struct pwm_ops { struct pwm_chip { struct device *dev; const struct pwm_ops *ops; + struct module *owner; int base; unsigned int npwm; - struct pwm_device * (*of_xlate)(struct pwm_chip *pc, + struct pwm_device * (*of_xlate)(struct pwm_chip *chip, const struct of_phandle_args *args); unsigned int of_pwm_n_cells; @@ -383,21 +381,21 @@ static inline void pwm_disable(struct pwm_device *pwm) /* PWM provider APIs */ int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); -int pwm_set_chip_data(struct pwm_device *pwm, void *data); -void *pwm_get_chip_data(struct pwm_device *pwm); -int pwmchip_add(struct pwm_chip *chip); +int __pwmchip_add(struct pwm_chip *chip, struct module *owner); +#define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE) void pwmchip_remove(struct pwm_chip *chip); -int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip); +int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner); +#define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE) struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, const char *label); -struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc, +struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *chip, const struct of_phandle_args *args); -struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc, +struct pwm_device *of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args); struct pwm_device *pwm_get(struct device *dev, const char *con_id); @@ -445,16 +443,6 @@ static inline int pwm_capture(struct pwm_device *pwm, return -EINVAL; } -static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) -{ - return -EINVAL; -} - -static inline void *pwm_get_chip_data(struct pwm_device *pwm) -{ - return NULL; -} - static inline int pwmchip_add(struct pwm_chip *chip) { return -EINVAL; diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h index 90e3045b2dcb..0d3b6ed21628 100644 --- a/include/linux/qed/qed_fcoe_if.h +++ b/include/linux/qed/qed_fcoe_if.h @@ -67,9 +67,6 @@ struct qed_fcoe_cb_ops { u32 (*get_login_failures)(void *cookie); }; -void qed_fcoe_set_pf_params(struct qed_dev *cdev, - struct qed_fcoe_pf_params *params); - /** * struct qed_fcoe_ops - qed FCoE operations. * @common: common operations pointer diff --git a/include/linux/quota.h b/include/linux/quota.h index fd692b4a41d5..07071e64abf3 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -285,7 +285,9 @@ static inline void dqstats_dec(unsigned int type) #define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ -#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ +#define DQ_RELEASING_B 6 /* dquot is in releasing_dquots list waiting + * to be cleaned up */ +#define DQ_LASTSET_B 7 /* Following 6 bits (see QIF_) are reserved\ * for the mask of entries set via SETQUOTA\ * quotactl. They are set under dq_data_lock\ * and the quota format handling dquot can\ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 11a4becff3a9..4fa4ef0a173a 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -57,7 +57,7 @@ static inline bool dquot_is_busy(struct dquot *dquot) { if (test_bit(DQ_MOD_B, &dquot->dq_flags)) return true; - if (atomic_read(&dquot->dq_count) > 1) + if (atomic_read(&dquot->dq_count) > 0) return true; return false; } diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index f29aaaf2eb21..98030accf641 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -84,8 +84,6 @@ extern const struct raid6_calls raid6_intx1; extern const struct raid6_calls raid6_intx2; extern const struct raid6_calls raid6_intx4; extern const struct raid6_calls raid6_intx8; -extern const struct raid6_calls raid6_intx16; -extern const struct raid6_calls raid6_intx32; extern const struct raid6_calls raid6_mmxx1; extern const struct raid6_calls raid6_mmxx2; extern const struct raid6_calls raid6_sse1x1; @@ -108,6 +106,8 @@ extern const struct raid6_calls raid6_vpermxor1; extern const struct raid6_calls raid6_vpermxor2; extern const struct raid6_calls raid6_vpermxor4; extern const struct raid6_calls raid6_vpermxor8; +extern const struct raid6_calls raid6_lsx; +extern const struct raid6_calls raid6_lasx; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); @@ -123,6 +123,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2; extern const struct raid6_recov_calls raid6_recov_avx512; extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_recov_calls raid6_recov_neon; +extern const struct raid6_recov_calls raid6_recov_lsx; +extern const struct raid6_recov_calls raid6_recov_lasx; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; diff --git a/include/linux/range.h b/include/linux/range.h index 7efb6a9b069b..6ad0b73cb7ad 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -31,12 +31,4 @@ int clean_sort_range(struct range *range, int az); void sort_range(struct range *range, int nr_range); -#define MAX_RESOURCE ((resource_size_t)~0) -static inline resource_size_t cap_resource(u64 val) -{ - if (val > MAX_RESOURCE) - return MAX_RESOURCE; - - return val; -} #endif diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 7ee7ed5de722..6dbc5a1bf6a8 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -60,6 +60,32 @@ rb_insert_augmented_cached(struct rb_node *node, rb_insert_augmented(node, &root->rb_root, augment); } +static __always_inline struct rb_node * +rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree, + bool (*less)(struct rb_node *, const struct rb_node *), + const struct rb_augment_callbacks *augment) +{ + struct rb_node **link = &tree->rb_root.rb_node; + struct rb_node *parent = NULL; + bool leftmost = true; + + while (*link) { + parent = *link; + if (less(node, parent)) { + link = &parent->rb_left; + } else { + link = &parent->rb_right; + leftmost = false; + } + } + + rb_link_node(node, parent, link); + augment->propagate(parent, NULL); /* suboptimal */ + rb_insert_augmented_cached(node, tree, leftmost, augment); + + return leftmost ? node : NULL; +} + /* * Template for declaring augmented rbtree callbacks (generic case) * diff --git a/include/linux/rcu_notifier.h b/include/linux/rcu_notifier.h new file mode 100644 index 000000000000..ebf371364581 --- /dev/null +++ b/include/linux/rcu_notifier.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Read-Copy Update notifiers, initially RCU CPU stall notifier. + * Separate from rcupdate.h to avoid #include loops. + * + * Copyright (C) 2023 Paul E. McKenney. + */ + +#ifndef __LINUX_RCU_NOTIFIER_H +#define __LINUX_RCU_NOTIFIER_H + +// Actions for RCU CPU stall notifier calls. +#define RCU_STALL_NOTIFY_NORM 1 +#define RCU_STALL_NOTIFY_EXP 2 + +#ifdef CONFIG_RCU_STALL_COMMON + +#include <linux/notifier.h> +#include <linux/types.h> + +int rcu_stall_chain_notifier_register(struct notifier_block *n); +int rcu_stall_chain_notifier_unregister(struct notifier_block *n); + +#else // #ifdef CONFIG_RCU_STALL_COMMON + +// No RCU CPU stall warnings in Tiny RCU. +static inline int rcu_stall_chain_notifier_register(struct notifier_block *n) { return -EEXIST; } +static inline int rcu_stall_chain_notifier_unregister(struct notifier_block *n) { return -ENOENT; } + +#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON + +#endif /* __LINUX_RCU_NOTIFIER_H */ diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index ba4c00dd8005..89186c499dd4 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -101,7 +101,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, { struct hlist_nulls_node *first = h->first; - n->next = first; + WRITE_ONCE(n->next, first); WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) @@ -137,7 +137,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, last = i; if (last) { - n->next = last->next; + WRITE_ONCE(n->next, last->next); n->pprev = &last->next; rcu_assign_pointer(hlist_nulls_next_rcu(last), n); } else { diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5e5f920ade90..f7206b2623c9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -122,8 +122,6 @@ static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -void rcu_report_dead(unsigned int cpu); -void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC void rcu_init_tasks_generic(void); diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 9bc8cbb33340..eda493200663 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -87,6 +87,7 @@ static inline void rcu_read_unlock_trace(void) void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); void synchronize_rcu_tasks_trace(void); void rcu_barrier_tasks_trace(void); +struct task_struct *get_rcu_tasks_trace_gp_kthread(void); #else /* * The BPF JIT forms these addresses even when it doesn't call these diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 699b938358bf..5e0f74f2f8ca 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -42,6 +42,11 @@ do { \ * call_srcu() function, with this wrapper supplying the pointer to the * corresponding srcu_struct. * + * Note that call_rcu_hurry() should be used instead of call_rcu() + * because in kernels built with CONFIG_RCU_LAZY=y the delay between the + * invocation of call_rcu() and that of the corresponding RCU callback + * can be multiple seconds. + * * The first argument tells Tiny RCU's _wait_rcu_gp() not to * bother waiting for RCU. The reason for this is because anywhere * synchronize_rcu_mult() can be called is automatically already a full diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 7f17acf29dda..d9ac7b136aea 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -138,6 +138,8 @@ static inline int rcu_needs_cpu(void) return 0; } +static inline void rcu_request_urgent_qs_task(struct task_struct *t) { } + /* * Take advantage of the fact that there is only one CPU, which * allows us to ignore virtualization-based context switches. @@ -169,6 +171,6 @@ static inline void rcu_all_qs(void) { barrier(); } #define rcutree_offline_cpu NULL #define rcutree_dead_cpu NULL #define rcutree_dying_cpu NULL -static inline void rcu_cpu_starting(unsigned int cpu) { } +static inline void rcutree_report_cpu_starting(unsigned int cpu) { } #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 56bccb5a8fde..254244202ea9 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -21,6 +21,7 @@ void rcu_softirq_qs(void); void rcu_note_context_switch(bool preempt); int rcu_needs_cpu(void); void rcu_cpu_stall_reset(void); +void rcu_request_urgent_qs_task(struct task_struct *t); /* * Note a virtualization-based context switch. This is simply a @@ -36,7 +37,6 @@ void synchronize_rcu_expedited(void); void kvfree_call_rcu(struct rcu_head *head, void *ptr); void rcu_barrier(void); -bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); @@ -110,9 +110,21 @@ void rcu_all_qs(void); /* RCUtree hotplug events */ int rcutree_prepare_cpu(unsigned int cpu); int rcutree_online_cpu(unsigned int cpu); -int rcutree_offline_cpu(unsigned int cpu); +void rcutree_report_cpu_starting(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU int rcutree_dead_cpu(unsigned int cpu); int rcutree_dying_cpu(unsigned int cpu); -void rcu_cpu_starting(unsigned int cpu); +int rcutree_offline_cpu(unsigned int cpu); +#else +#define rcutree_dead_cpu NULL +#define rcutree_dying_cpu NULL +#define rcutree_offline_cpu NULL +#endif + +void rcutree_migrate_callbacks(int cpu); + +/* Called from hotplug and also arm64 early secondary boot failure */ +void rcutree_report_cpu_dead(void); #endif /* __LINUX_RCUTREE_H */ diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 2b6bb593be5b..c4cc3b89ced1 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -129,11 +129,14 @@ enum sys_off_mode { * @cb_data: User's callback data. * @cmd: Command string. Currently used only by the sys-off restart mode, * NULL otherwise. + * @dev: Device of the sys-off handler. Only if known (devm_register_*), + * NULL otherwise. */ struct sys_off_data { int mode; void *cb_data; const char *cmd; + struct device *dev; }; struct sys_off_handler * diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 8fc0b3ebce44..c9182a47736e 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1287,6 +1287,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min, void regcache_cache_only(struct regmap *map, bool enable); void regcache_cache_bypass(struct regmap *map, bool enable); void regcache_mark_dirty(struct regmap *map); +bool regcache_reg_cached(struct regmap *map, unsigned int reg); bool regmap_check_range_table(struct regmap *map, unsigned int reg, const struct regmap_access_table *table); diff --git a/include/linux/regulator/db8500-prcmu.h b/include/linux/regulator/db8500-prcmu.h index f90df9ee703e..d58ff273157e 100644 --- a/include/linux/regulator/db8500-prcmu.h +++ b/include/linux/regulator/db8500-prcmu.h @@ -35,10 +35,4 @@ enum db8500_regulator_id { DB8500_NUM_REGULATORS }; -/* - * Exported interface for CPUIdle only. This function is called with all - * interrupts turned off. - */ -int power_state_active_is_enabled(void); - #endif diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index c6ef7d68eb9a..4b7eceb3828b 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -292,11 +292,12 @@ enum regulator_type { * @ramp_delay: Time to settle down after voltage change (unit: uV/us) * @min_dropout_uV: The minimum dropout voltage this regulator can handle * @linear_ranges: A constant table of possible voltage ranges. - * @linear_range_selectors: A constant table of voltage range selectors. - * If pickable ranges are used each range must - * have corresponding selector here. + * @linear_range_selectors_bitfield: A constant table of voltage range + * selectors as bitfield values. If + * pickable ranges are used each range + * must have corresponding selector here. * @n_linear_ranges: Number of entries in the @linear_ranges (and in - * linear_range_selectors if used) table(s). + * linear_range_selectors_bitfield if used) table(s). * @volt_table: Voltage mapping table (if table based mapping) * @curr_table: Current limit mapping table (if table based mapping) * @@ -384,7 +385,7 @@ struct regulator_desc { int min_dropout_uV; const struct linear_range *linear_ranges; - const unsigned int *linear_range_selectors; + const unsigned int *linear_range_selectors_bitfield; int n_linear_ranges; diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h index c71a6a9fce7a..562386f9b80e 100644 --- a/include/linux/regulator/mt6358-regulator.h +++ b/include/linux/regulator/mt6358-regulator.h @@ -86,6 +86,9 @@ enum { MT6366_ID_VMC, MT6366_ID_VAUD28, MT6366_ID_VSIM2, + MT6366_ID_VM18, + MT6366_ID_VMDDR, + MT6366_ID_VSRAM_CORE, MT6366_ID_RG_MAX, }; diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index fe8978eb69f1..b4795698d8c2 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -690,6 +690,10 @@ int rproc_detach(struct rproc *rproc); int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem); + +/* from remoteproc_coredump.c */ +void rproc_coredump_cleanup(struct rproc *rproc); +void rproc_coredump(struct rproc *rproc); void rproc_coredump_using_sections(struct rproc *rproc); int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); int rproc_coredump_add_custom_segment(struct rproc *rproc, diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 8334eeacfec5..66942d7fba7f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -94,7 +94,7 @@ struct rdt_domain { * zero CBM. * @shareable_bits: Bitmask of shareable resource with other * executing entities - * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. + * @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid. * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache * level has CPU scope. */ @@ -102,7 +102,7 @@ struct resctrl_cache { unsigned int cbm_len; unsigned int min_cbm_bits; unsigned int shareable_bits; - bool arch_has_sparse_bitmaps; + bool arch_has_sparse_bitmasks; bool arch_has_per_cpu_cfg; }; diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h index 285189454449..f8f3e958e9cf 100644 --- a/include/linux/resume_user_mode.h +++ b/include/linux/resume_user_mode.h @@ -55,7 +55,7 @@ static inline void resume_user_mode_work(struct pt_regs *regs) } #endif - mem_cgroup_handle_over_high(); + mem_cgroup_handle_over_high(GFP_KERNEL); blkcg_maybe_throttle_current(); rseq_handle_notify_resume(NULL, regs); diff --git a/include/linux/rethook.h b/include/linux/rethook.h index 26b6f3c81a76..ba60962805f6 100644 --- a/include/linux/rethook.h +++ b/include/linux/rethook.h @@ -6,11 +6,10 @@ #define _LINUX_RETHOOK_H #include <linux/compiler.h> -#include <linux/freelist.h> +#include <linux/objpool.h> #include <linux/kallsyms.h> #include <linux/llist.h> #include <linux/rcupdate.h> -#include <linux/refcount.h> struct rethook_node; @@ -29,15 +28,18 @@ typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long, */ struct rethook { void *data; - rethook_handler_t handler; - struct freelist_head pool; - refcount_t ref; + /* + * To avoid sparse warnings, this uses a raw function pointer with + * __rcu, instead of rethook_handler_t. But this must be same as + * rethook_handler_t. + */ + void (__rcu *handler) (struct rethook_node *, void *, unsigned long, struct pt_regs *); + struct objpool_head pool; struct rcu_head rcu; }; /** * struct rethook_node - The rethook shadow-stack entry node. - * @freelist: The freelist, linked to struct rethook::pool. * @rcu: The rcu_head for deferred freeing. * @llist: The llist, linked to a struct task_struct::rethooks. * @rethook: The pointer to the struct rethook. @@ -48,20 +50,16 @@ struct rethook { * on each entry of the shadow stack. */ struct rethook_node { - union { - struct freelist_node freelist; - struct rcu_head rcu; - }; + struct rcu_head rcu; struct llist_node llist; struct rethook *rethook; unsigned long ret_addr; unsigned long frame; }; -struct rethook *rethook_alloc(void *data, rethook_handler_t handler); +struct rethook *rethook_alloc(void *data, rethook_handler_t handler, int size, int num); void rethook_stop(struct rethook *rh); void rethook_free(struct rethook *rh); -void rethook_add_node(struct rethook *rh, struct rethook_node *node); struct rethook_node *rethook_try_get(struct rethook *rh); void rethook_recycle(struct rethook_node *node); void rethook_hook(struct rethook_node *node, struct pt_regs *regs, bool mcount); @@ -98,4 +96,3 @@ void rethook_flush_task(struct task_struct *tk); #endif #endif - diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b87d01660412..b26fe858fd44 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -189,7 +189,7 @@ typedef int __bitwise rmap_t; /* * rmap interfaces called when adding or removing pte of page */ -void page_move_anon_rmap(struct page *, struct vm_area_struct *); +void folio_move_anon_rmap(struct folio *, struct vm_area_struct *); void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long address, rmap_t flags); void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, @@ -198,10 +198,12 @@ void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); void page_add_file_rmap(struct page *, struct vm_area_struct *, bool compound); +void folio_add_file_rmap_range(struct folio *, struct page *, unsigned int nr, + struct vm_area_struct *, bool compound); void page_remove_rmap(struct page *, struct vm_area_struct *, bool compound); -void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, +void hugepage_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); @@ -477,7 +479,6 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, #define anon_vma_init() do {} while (0) #define anon_vma_prepare(vma) (0) -#define anon_vma_link(vma) do {} while (0) static inline int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 523c98b96cb4..90d8e4475f80 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -64,12 +64,14 @@ struct rpmsg_device { }; typedef int (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32); +typedef int (*rpmsg_flowcontrol_cb_t)(struct rpmsg_device *, void *, bool); /** * struct rpmsg_endpoint - binds a local rpmsg address to its user * @rpdev: rpmsg channel device * @refcount: when this drops to zero, the ept is deallocated * @cb: rx callback handler + * @flow_cb: remote flow control callback handler * @cb_lock: must be taken before accessing/changing @cb * @addr: local rpmsg address * @priv: private data for the driver's use @@ -92,6 +94,7 @@ struct rpmsg_endpoint { struct rpmsg_device *rpdev; struct kref refcount; rpmsg_rx_cb_t cb; + rpmsg_flowcontrol_cb_t flow_cb; struct mutex cb_lock; u32 addr; void *priv; @@ -106,6 +109,7 @@ struct rpmsg_endpoint { * @probe: invoked when a matching rpmsg channel (i.e. device) is found * @remove: invoked when the rpmsg channel is removed * @callback: invoked when an inbound message is received on the channel + * @flowcontrol: invoked when remote side flow control request is received */ struct rpmsg_driver { struct device_driver drv; @@ -113,6 +117,7 @@ struct rpmsg_driver { int (*probe)(struct rpmsg_device *dev); void (*remove)(struct rpmsg_device *dev); int (*callback)(struct rpmsg_device *, void *, int, void *, u32); + int (*flowcontrol)(struct rpmsg_device *, void *, bool); }; static inline u16 rpmsg16_to_cpu(struct rpmsg_device *rpdev, __rpmsg16 val) @@ -192,6 +197,8 @@ __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept); +int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst); + #else static inline int rpmsg_register_device_override(struct rpmsg_device *rpdev, @@ -316,6 +323,14 @@ static inline ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept) return -ENXIO; } +static inline int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + #endif /* IS_ENABLED(CONFIG_RPMSG) */ /* use a macro to avoid include chaining to get THIS_MODULE */ diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 1fd9c6a21ebe..5f8e438a0312 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -146,6 +146,7 @@ struct rtc_device { time64_t range_min; timeu64_t range_max; + timeu64_t alarm_offset_max; time64_t start_secs; time64_t offset_secs; bool set_start_time; @@ -224,6 +225,23 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } +/** + * rtc_bound_alarmtime() - Return alarm time bound by rtc limit + * @rtc: Pointer to rtc device structure + * @requested: Requested alarm timeout + * + * Return: Alarm timeout bound by maximum alarm time supported by rtc. + */ +static inline ktime_t rtc_bound_alarmtime(struct rtc_device *rtc, + ktime_t requested) +{ + if (rtc->alarm_offset_max && + rtc->alarm_offset_max * MSEC_PER_SEC < ktime_to_ms(requested)) + return ms_to_ktime(rtc->alarm_offset_max * MSEC_PER_SEC); + + return requested; +} + #define devm_rtc_register_device(device) \ __devm_rtc_register_device(THIS_MODULE, device) diff --git a/include/linux/sched.h b/include/linux/sched.h index 609bde814cb0..292c31697248 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -63,7 +63,6 @@ struct robust_list_head; struct root_domain; struct rq; struct sched_attr; -struct sched_param; struct seq_file; struct sighand_struct; struct signal_struct; @@ -75,14 +74,14 @@ struct user_event_mm; * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). * - * We have two separate sets of flags: task->state + * We have two separate sets of flags: task->__state * is about runnability, while task->exit_state are * about the task exiting. Confusing, but this way * modifying one set can't modify the other one by * mistake. */ -/* Used in tsk->state: */ +/* Used in tsk->__state: */ #define TASK_RUNNING 0x00000000 #define TASK_INTERRUPTIBLE 0x00000001 #define TASK_UNINTERRUPTIBLE 0x00000002 @@ -92,7 +91,7 @@ struct user_event_mm; #define EXIT_DEAD 0x00000010 #define EXIT_ZOMBIE 0x00000020 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) -/* Used in tsk->state again: */ +/* Used in tsk->__state again: */ #define TASK_PARKED 0x00000040 #define TASK_DEAD 0x00000080 #define TASK_WAKEKILL 0x00000100 @@ -173,7 +172,7 @@ struct user_event_mm; #endif /* - * set_current_state() includes a barrier so that the write of current->state + * set_current_state() includes a barrier so that the write of current->__state * is correctly serialised wrt the caller's subsequent test of whether to * actually sleep: * @@ -196,9 +195,9 @@ struct user_event_mm; * wake_up_state(p, TASK_UNINTERRUPTIBLE); * * where wake_up_state()/try_to_wake_up() executes a full memory barrier before - * accessing p->state. + * accessing p->__state. * - * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is, + * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is, * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). * @@ -370,6 +369,10 @@ extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; #endif +struct sched_param { + int sched_priority; +}; + struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ @@ -549,13 +552,18 @@ struct sched_entity { /* For load-balancing: */ struct load_weight load; struct rb_node run_node; + u64 deadline; + u64 min_deadline; + struct list_head group_node; unsigned int on_rq; u64 exec_start; u64 sum_exec_runtime; - u64 vruntime; u64 prev_sum_exec_runtime; + u64 vruntime; + s64 vlag; + u64 slice; u64 nr_migrations; @@ -745,10 +753,8 @@ struct task_struct { #endif unsigned int __state; -#ifdef CONFIG_PREEMPT_RT /* saved state for "spinlock sleepers" */ unsigned int saved_state; -#endif /* * This begins the randomizable portion of task_struct. Only @@ -870,6 +876,7 @@ struct task_struct { struct mm_struct *mm; struct mm_struct *active_mm; + struct address_space *faults_disabled_mapping; int exit_state; int exit_code; @@ -906,6 +913,9 @@ struct task_struct { * ->sched_remote_wakeup gets used, so it can be in this word. */ unsigned sched_remote_wakeup:1; +#ifdef CONFIG_RT_MUTEXES + unsigned sched_rt_mutex:1; +#endif /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; @@ -997,7 +1007,6 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid *thread_pid; struct hlist_node pid_links[PIDTYPE_MAX]; - struct list_head thread_group; struct list_head thread_node; struct completion *vfork_done; @@ -1438,6 +1447,10 @@ struct task_struct { struct mem_cgroup *active_memcg; #endif +#ifdef CONFIG_MEMCG_KMEM + struct obj_cgroup *objcg; +#endif + #ifdef CONFIG_BLK_CGROUP struct gendisk *throttle_disk; #endif @@ -1666,7 +1679,7 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); - if (tsk_state == TASK_IDLE) + if ((tsk_state & TASK_IDLE) == TASK_IDLE) state = TASK_REPORT_IDLE; /* @@ -1674,7 +1687,7 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, * to userspace, we can make this appear as if the task has gone through * a regular rt_mutex_lock() call. */ - if (tsk_state == TASK_RTLOCK_WAIT) + if (tsk_state & TASK_RTLOCK_WAIT) state = TASK_UNINTERRUPTIBLE; return fls(state); @@ -1853,7 +1866,17 @@ extern int task_can_attach(struct task_struct *p); extern int dl_bw_alloc(int cpu, u64 dl_bw); extern void dl_bw_free(int cpu, u64 dl_bw); #ifdef CONFIG_SMP + +/* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); + +/** + * set_cpus_allowed_ptr - set CPU affinity mask of a task + * @p: the task + * @new_mask: CPU affinity mask + * + * Return: zero if successful, or a negative error code + */ extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); extern void release_user_cpus_ptr(struct task_struct *p); @@ -2433,9 +2456,11 @@ extern void sched_core_free(struct task_struct *tsk); extern void sched_core_fork(struct task_struct *p); extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, unsigned long uaddr); +extern int sched_core_idle_cpu(int cpu); #else static inline void sched_core_free(struct task_struct *tsk) { } static inline void sched_core_fork(struct task_struct *p) { } +static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } #endif extern void sched_set_stop_task(int cpu, struct task_struct *stop); diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 0ee96ea7a0e9..02f5090ffea2 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 26 /* mm is shared between processes */ /* @@ -85,10 +86,22 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HAS_MDWE 28 #define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) + +#define MMF_HAS_MDWE_NO_INHERIT 29 + +#define MMF_VM_MERGE_ANY 30 +#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) + MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ + MMF_VM_MERGE_ANY_MASK) + +static inline unsigned long mmf_init_flags(unsigned long flags) +{ + if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) + flags &= ~((1UL << MMF_HAS_MDWE) | + (1UL << MMF_HAS_MDWE_NO_INHERIT)); + return flags & MMF_INIT_MASK; +} -#define MMF_VM_MERGE_ANY 29 #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 7c83d4d5a971..df3aca89d4f5 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_DEADLINE_H +#define _LINUX_SCHED_DEADLINE_H /* * SCHED_DEADLINE tasks has negative priorities, reflecting @@ -34,3 +36,5 @@ extern void dl_add_task_root_domain(struct task_struct *p); extern void dl_clear_root_domain(struct root_domain *rd); #endif /* CONFIG_SMP */ + +#endif /* _LINUX_SCHED_DEADLINE_H */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 8d89c8c4fac1..9a19f1b42f64 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -403,6 +403,10 @@ DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); * __GFP_ACCOUNT allocations till the end of the scope will be charged to the * given memcg. * + * Please, make sure that caller has a reference to the passed memcg structure, + * so its lifetime is guaranteed to exceed the scope between two + * set_active_memcg() calls. + * * NOTE: This function can nest. Users must save the return value and * reset the previous value after their own charging scope is over. */ diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 3988762efe15..52b22c5c396d 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -15,13 +15,23 @@ #define TNF_FAULT_LOCAL 0x08 #define TNF_MIGRATE_FAIL 0x10 +enum numa_vmaskip_reason { + NUMAB_SKIP_UNSUITABLE, + NUMAB_SKIP_SHARED_RO, + NUMAB_SKIP_INACCESSIBLE, + NUMAB_SKIP_SCAN_DELAY, + NUMAB_SKIP_PID_INACTIVE, + NUMAB_SKIP_IGNORE_PID, + NUMAB_SKIP_SEQ_COMPLETED, +}; + #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); extern void task_numa_free(struct task_struct *p, bool final); -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, - int src_nid, int dst_cpu); +bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio, + int src_nid, int dst_cpu); #else static inline void task_numa_fault(int last_node, int node, int pages, int flags) @@ -38,7 +48,7 @@ static inline void task_numa_free(struct task_struct *p, bool final) { } static inline bool should_numa_migrate_memory(struct task_struct *p, - struct page *page, int src_nid, int dst_cpu) + struct folio *folio, int src_nid, int dst_cpu) { return true; } diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 994c25640e15..b2b9e6eb9683 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) } #ifdef CONFIG_RT_MUTEXES +extern void rt_mutex_pre_schedule(void); +extern void rt_mutex_schedule(void); +extern void rt_mutex_post_schedule(void); + /* * Must hold either p->pi_lock or task_rq(p)->lock. */ diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index fad77b5172e2..a8b28647aafc 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -110,6 +110,13 @@ SD_FLAG(SD_ASYM_CPUCAPACITY_FULL, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* + * Domain members share CPU cluster (LLC tags or L2 cache) + * + * NEEDS_GROUPS: Clusters are shared between groups. + */ +SD_FLAG(SD_CLUSTER, SDF_NEEDS_GROUPS) + +/* * Domain members share CPU package resources (i.e. caches) * * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 669e8cff40c7..3499c1a8b929 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -303,20 +303,11 @@ static inline void kernel_signal_stop(void) schedule(); } -#ifdef __ia64__ -# define ___ARCH_SI_IA64(_a1, _a2, _a3) , _a1, _a2, _a3 -#else -# define ___ARCH_SI_IA64(_a1, _a2, _a3) -#endif -int force_sig_fault_to_task(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); -int force_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)); -int send_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); +int force_sig_fault_to_task(int sig, int code, void __user *addr, + struct task_struct *t); +int force_sig_fault(int sig, int code, void __user *addr); +int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t); int force_sig_mceerr(int code, void __user *, short); int send_sig_mceerr(int code, void __user *, short, struct task_struct *); @@ -649,17 +640,15 @@ extern void flush_itimer_signals(void); extern bool current_is_single_threaded(void); /* - * Careful: do_each_thread/while_each_thread is a double loop so - * 'break' will not work as expected - use goto instead. + * Without tasklist/siglock it is only rcu-safe if g can't exit/exec, + * otherwise next_thread(t) will never reach g after list_del_rcu(g). */ -#define do_each_thread(g, t) \ - for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do - #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) #define __for_each_thread(signal, t) \ - list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node, \ + lockdep_is_held(&tasklist_lock)) #define for_each_thread(p, t) \ __for_each_thread((p)->signal, t) @@ -718,15 +707,26 @@ bool same_thread_group(struct task_struct *p1, struct task_struct *p2) return p1->signal == p2->signal; } -static inline struct task_struct *next_thread(const struct task_struct *p) +/* + * returns NULL if p is the last thread in the thread group + */ +static inline struct task_struct *__next_thread(struct task_struct *p) +{ + return list_next_or_null_rcu(&p->signal->thread_head, + &p->thread_node, + struct task_struct, + thread_node); +} + +static inline struct task_struct *next_thread(struct task_struct *p) { - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); + return __next_thread(p) ?: p->group_leader; } static inline int thread_group_empty(struct task_struct *p) { - return list_empty(&p->thread_group); + return thread_group_leader(p) && + list_is_last(&p->thread_node, &p->signal->thread_head); } #define delay_group_leader(p) \ diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index 59d3736c454c..fb1e295e7e63 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -17,4 +17,4 @@ static inline bool sched_smt_active(void) { return false; } void arch_smt_update(void); -#endif +#endif /* _LINUX_SCHED_SMT_H */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index dd35ce28bb90..a23af225c898 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -118,11 +118,47 @@ static inline struct task_struct *get_task_struct(struct task_struct *t) } extern void __put_task_struct(struct task_struct *t); +extern void __put_task_struct_rcu_cb(struct rcu_head *rhp); static inline void put_task_struct(struct task_struct *t) { - if (refcount_dec_and_test(&t->usage)) + if (!refcount_dec_and_test(&t->usage)) + return; + + /* + * In !RT, it is always safe to call __put_task_struct(). + * Under RT, we can only call it in preemptible context. + */ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) { + static DEFINE_WAIT_OVERRIDE_MAP(put_task_map, LD_WAIT_SLEEP); + + lock_map_acquire_try(&put_task_map); __put_task_struct(t); + lock_map_release(&put_task_map); + return; + } + + /* + * under PREEMPT_RT, we can't call put_task_struct + * in atomic context because it will indirectly + * acquire sleeping locks. + * + * call_rcu() will schedule delayed_put_task_struct_rcu() + * to be called in process context. + * + * __put_task_struct() is called when + * refcount_dec_and_test(&t->usage) succeeds. + * + * This means that it can't "conflict" with + * put_task_struct_rcu_user() which abuses ->rcu the same + * way; rcu_users has a reference so task->usage can't be + * zero after rcu_users 1 -> 0 transition. + * + * delayed_free_task() also uses ->rcu, but it is only called + * when it fails to fork a process. Therefore, there is no + * way it can conflict with put_task_struct(). + */ + call_rcu(&t->rcu, __put_task_struct_rcu_cb); } DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T)) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 67b573d5bf28..de545ba85218 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -45,7 +45,7 @@ static inline int cpu_smt_flags(void) #ifdef CONFIG_SCHED_CLUSTER static inline int cpu_cluster_flags(void) { - return SD_SHARE_PKG_RESOURCES; + return SD_CLUSTER | SD_SHARE_PKG_RESOURCES; } #endif @@ -109,8 +109,6 @@ struct sched_domain { u64 max_newidle_lb_cost; unsigned long last_decay_max_lb_cost; - u64 avg_scan_cost; /* select_idle_sibling */ - #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -179,6 +177,7 @@ cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); bool cpus_share_cache(int this_cpu, int that_cpu); +bool cpus_share_resources(int this_cpu, int that_cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); typedef int (*sched_domain_flags_f)(void); @@ -232,6 +231,11 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) return true; } +static inline bool cpus_share_resources(int this_cpu, int that_cpu) +{ + return true; +} + #endif /* !CONFIG_SMP */ #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h index 3c3e049224ae..969aaf5ef9d6 100644 --- a/include/linux/sched/types.h +++ b/include/linux/sched/types.h @@ -20,4 +20,4 @@ struct task_cputime { unsigned long long sum_exec_runtime; }; -#endif +#endif /* _LINUX_SCHED_TYPES_H */ diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h index 837a23624a66..bc60243d43b3 100644 --- a/include/linux/sched/vhost_task.h +++ b/include/linux/sched/vhost_task.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_VHOST_TASK_H -#define _LINUX_VHOST_TASK_H - +#ifndef _LINUX_SCHED_VHOST_TASK_H +#define _LINUX_SCHED_VHOST_TASK_H struct vhost_task; @@ -11,4 +10,4 @@ void vhost_task_start(struct vhost_task *vtsk); void vhost_task_stop(struct vhost_task *vtsk); void vhost_task_wake(struct vhost_task *vtsk); -#endif +#endif /* _LINUX_SCHED_VHOST_TASK_H */ diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index e6fe4f73ffe6..f2f05fb42d28 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -58,6 +58,8 @@ struct scmi_clock_info { u64 step_size; } range; }; + int num_parents; + u32 *parents; }; enum scmi_power_scale { @@ -80,6 +82,11 @@ struct scmi_protocol_handle; * @rate_set: set the clock rate of a clock * @enable: enables the specified clock * @disable: disables the specified clock + * @state_get: get the status of the specified clock + * @config_oem_get: get the value of an OEM specific clock config + * @config_oem_set: set the value of an OEM specific clock config + * @parent_get: get the parent id of a clk + * @parent_set: set the parent of a clock */ struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); @@ -90,22 +97,36 @@ struct scmi_clk_proto_ops { u64 *rate); int (*rate_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u64 rate); - int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*enable_atomic)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*disable_atomic)(const struct scmi_protocol_handle *ph, - u32 clk_id); + int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic); + int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic); + int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool *enabled, bool atomic); + int (*config_oem_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + u8 oem_type, u32 *oem_val, u32 *attributes, + bool atomic); + int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id, + u8 oem_type, u32 oem_val, bool atomic); + int (*parent_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 *parent_id); + int (*parent_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 parent_id); +}; + +struct scmi_perf_domain_info { + char name[SCMI_MAX_STR_SIZE]; + bool set_perf; }; /** * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol * + * @num_domains_get: gets the number of supported performance domains + * @info_get: get the information of a performance domain * @limits_set: sets limits on the performance level of a domain * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain * @level_get: gets the performance level of a domain - * @device_domain_id: gets the scmi domain id for a given device * @transition_latency_get: gets the DVFS transition latency for a given device * @device_opps_add: adds all the OPPs for a given device * @freq_set: sets the frequency for a given device using sustained frequency @@ -120,6 +141,9 @@ struct scmi_clk_proto_ops { * or in some other (abstract) scale */ struct scmi_perf_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); + const struct scmi_perf_domain_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 domain); int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf); int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain, @@ -128,11 +152,10 @@ struct scmi_perf_proto_ops { u32 level, bool poll); int (*level_get)(const struct scmi_protocol_handle *ph, u32 domain, u32 *level, bool poll); - int (*device_domain_id)(struct device *dev); int (*transition_latency_get)(const struct scmi_protocol_handle *ph, - struct device *dev); + u32 domain); int (*device_opps_add)(const struct scmi_protocol_handle *ph, - struct device *dev); + struct device *dev, u32 domain); int (*freq_set)(const struct scmi_protocol_handle *ph, u32 domain, unsigned long rate, bool poll); int (*freq_get)(const struct scmi_protocol_handle *ph, u32 domain, @@ -140,7 +163,7 @@ struct scmi_perf_proto_ops { int (*est_power_get)(const struct scmi_protocol_handle *ph, u32 domain, unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, - struct device *dev); + u32 domain); enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph); }; diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 988528b5da43..35f3a4a8ceb1 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -6,24 +6,23 @@ extern const struct address_space_operations secretmem_aops; -static inline bool page_is_secretmem(struct page *page) +static inline bool folio_is_secretmem(struct folio *folio) { struct address_space *mapping; /* - * Using page_mapping() is quite slow because of the actual call - * instruction and repeated compound_head(page) inside the - * page_mapping() function. + * Using folio_mapping() is quite slow because of the actual call + * instruction. * We know that secretmem pages are not compound and LRU so we can * save a couple of cycles here. */ - if (PageCompound(page) || !PageLRU(page)) + if (folio_test_large(folio) || !folio_test_lru(folio)) return false; mapping = (struct address_space *) - ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS); + ((unsigned long)folio->mapping & ~PAGE_MAPPING_FLAGS); - if (!mapping || mapping != page->mapping) + if (!mapping || mapping != folio->mapping) return false; return mapping->a_ops == &secretmem_aops; @@ -39,7 +38,7 @@ static inline bool vma_is_secretmem(struct vm_area_struct *vma) return false; } -static inline bool page_is_secretmem(struct page *page) +static inline bool folio_is_secretmem(struct folio *folio) { return false; } diff --git a/include/linux/security.h b/include/linux/security.h index 32828502f09e..1d1df326c881 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -145,12 +145,13 @@ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, extern int cap_settime(const struct timespec64 *ts, const struct timezone *tz); extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); -extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); +extern int cap_capget(const struct task_struct *target, kernel_cap_t *effective, + kernel_cap_t *inheritable, kernel_cap_t *permitted); extern int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); +extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int cap_inode_removexattr(struct mnt_idmap *idmap, @@ -268,10 +269,10 @@ int security_binder_transaction(const struct cred *from, int security_binder_transfer_binder(const struct cred *from, const struct cred *to); int security_binder_transfer_file(const struct cred *from, - const struct cred *to, struct file *file); + const struct cred *to, const struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); -int security_capget(struct task_struct *target, +int security_capget(const struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -283,16 +284,17 @@ int security_capable(const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts); -int security_quotactl(int cmds, int type, int id, struct super_block *sb); +int security_quotactl(int cmds, int type, int id, const struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); int security_settime64(const struct timespec64 *ts, const struct timezone *tz); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_bprm_creds_for_exec(struct linux_binprm *bprm); -int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); +int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int security_bprm_check(struct linux_binprm *bprm); -void security_bprm_committing_creds(struct linux_binprm *bprm); -void security_bprm_committed_creds(struct linux_binprm *bprm); +void security_bprm_committing_creds(const struct linux_binprm *bprm); +void security_bprm_committed_creds(const struct linux_binprm *bprm); +int security_fs_context_submount(struct fs_context *fc, struct super_block *reference); int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param); int security_sb_alloc(struct super_block *sb); @@ -302,7 +304,7 @@ void security_free_mnt_opts(void **mnt_opts); int security_sb_eat_lsm_opts(char *options, void **mnt_opts); int security_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts); int security_sb_remount(struct super_block *sb, void *mnt_opts); -int security_sb_kern_mount(struct super_block *sb); +int security_sb_kern_mount(const struct super_block *sb); int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(const char *dev_name, const struct path *path, @@ -537,7 +539,7 @@ static inline int security_binder_transfer_binder(const struct cred *from, static inline int security_binder_transfer_file(const struct cred *from, const struct cred *to, - struct file *file) + const struct file *file) { return 0; } @@ -553,7 +555,7 @@ static inline int security_ptrace_traceme(struct task_struct *parent) return cap_ptrace_traceme(parent); } -static inline int security_capget(struct task_struct *target, +static inline int security_capget(const struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) @@ -579,7 +581,7 @@ static inline int security_capable(const struct cred *cred, } static inline int security_quotactl(int cmds, int type, int id, - struct super_block *sb) + const struct super_block *sb) { return 0; } @@ -611,7 +613,7 @@ static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm) } static inline int security_bprm_creds_from_file(struct linux_binprm *bprm, - struct file *file) + const struct file *file) { return cap_bprm_creds_from_file(bprm, file); } @@ -621,14 +623,19 @@ static inline int security_bprm_check(struct linux_binprm *bprm) return 0; } -static inline void security_bprm_committing_creds(struct linux_binprm *bprm) +static inline void security_bprm_committing_creds(const struct linux_binprm *bprm) { } -static inline void security_bprm_committed_creds(struct linux_binprm *bprm) +static inline void security_bprm_committed_creds(const struct linux_binprm *bprm) { } +static inline int security_fs_context_submount(struct fs_context *fc, + struct super_block *reference) +{ + return 0; +} static inline int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) { @@ -1439,7 +1446,8 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u int security_sk_alloc(struct sock *sk, int family, gfp_t priority); void security_sk_free(struct sock *sk); void security_sk_clone(const struct sock *sk, struct sock *newsk); -void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic); +void security_sk_classify_flow(const struct sock *sk, + struct flowi_common *flic); void security_req_classify_flow(const struct request_sock *req, struct flowi_common *flic); void security_sock_graft(struct sock*sk, struct socket *parent); @@ -1597,7 +1605,7 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline void security_sk_classify_flow(struct sock *sk, +static inline void security_sk_classify_flow(const struct sock *sk, struct flowi_common *flic) { } diff --git a/include/linux/sed-opal-key.h b/include/linux/sed-opal-key.h new file mode 100644 index 000000000000..0ca03054e8f6 --- /dev/null +++ b/include/linux/sed-opal-key.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * SED key operations. + * + * Copyright (C) 2023 IBM Corporation + * + * These are the accessor functions (read/write) for SED Opal + * keys. Specific keystores can provide overrides. + * + */ + +#include <linux/kernel.h> + +#ifdef CONFIG_PSERIES_PLPKS_SED +int sed_read_key(char *keyname, char *key, u_int *keylen); +int sed_write_key(char *keyname, char *key, u_int keylen); +#else +static inline +int sed_read_key(char *keyname, char *key, u_int *keylen) { + return -EOPNOTSUPP; +} +static inline +int sed_write_key(char *keyname, char *key, u_int keylen) { + return -EOPNOTSUPP; +} +#endif diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index bbae1e52ab4f..2ac50822554e 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -25,6 +25,9 @@ bool opal_unlock_from_suspend(struct opal_dev *dev); struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv); int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr); +#define OPAL_AUTH_KEY "opal-boot-pin" +#define OPAL_AUTH_KEY_PREV "opal-boot-pin-prev" + static inline bool is_sed_ioctl(unsigned int cmd) { switch (cmd) { @@ -47,6 +50,8 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_GET_STATUS: case IOC_OPAL_GET_LR_STATUS: case IOC_OPAL_GET_GEOMETRY: + case IOC_OPAL_DISCOVERY: + case IOC_OPAL_REVERT_LSP: return true; } return false; diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 515d7fcb9634..5fb1f12c33f9 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -14,19 +14,25 @@ * @buffer: pointer to the buffer * @size: size of the buffer * @len: the amount of data inside the buffer - * @readpos: The next position to read in the buffer. */ struct seq_buf { char *buffer; size_t size; size_t len; - loff_t readpos; }; +#define DECLARE_SEQ_BUF(NAME, SIZE) \ + char __ ## NAME ## _buffer[SIZE] = ""; \ + struct seq_buf NAME = { \ + .buffer = &__ ## NAME ## _buffer, \ + .size = SIZE, \ + } + static inline void seq_buf_clear(struct seq_buf *s) { s->len = 0; - s->readpos = 0; + if (s->size) + s->buffer[0] = '\0'; } static inline void @@ -39,7 +45,7 @@ seq_buf_init(struct seq_buf *s, char *buf, unsigned int size) /* * seq_buf have a buffer that might overflow. When this happens - * the len and size are set to be equal. + * len is set to be greater than size. */ static inline bool seq_buf_has_overflowed(struct seq_buf *s) @@ -72,8 +78,8 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** - * seq_buf_terminate - Make sure buffer is nul terminated - * @s: the seq_buf descriptor to terminate. + * seq_buf_str - get %NUL-terminated C string from seq_buf + * @s: the seq_buf handle * * This makes sure that the buffer in @s is nul terminated and * safe to read as a string. @@ -84,16 +90,20 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) * * After this function is called, s->buffer is safe to use * in string operations. + * + * Returns @s->buf after making sure it is terminated. */ -static inline void seq_buf_terminate(struct seq_buf *s) +static inline const char *seq_buf_str(struct seq_buf *s) { if (WARN_ON(s->size == 0)) - return; + return ""; if (seq_buf_buffer_left(s)) s->buffer[s->len] = 0; else s->buffer[s->size - 1] = 0; + + return s->buffer; } /** @@ -143,7 +153,7 @@ extern __printf(2, 0) int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args); extern int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s); extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, - int cnt); + size_t start, int cnt); extern int seq_buf_puts(struct seq_buf *s, const char *str); extern int seq_buf_putc(struct seq_buf *s, unsigned char c); extern int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index bd023dd38ae6..234bcdb1fba4 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -207,6 +207,21 @@ static const struct file_operations __name ## _fops = { \ .release = single_release, \ } +#define DEFINE_SHOW_STORE_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .write = __name ## _write, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ @@ -249,18 +264,19 @@ static inline void seq_show_option(struct seq_file *m, const char *name, /** * seq_show_option_n - display mount options with appropriate escapes - * where @value must be a specific length. + * where @value must be a specific length (i.e. + * not NUL-terminated). * @m: the seq_file handle * @name: the mount option name * @value: the mount option name's value, cannot be NULL - * @length: the length of @value to display + * @length: the exact length of @value to display, must be constant expression * * This is a macro since this uses "length" to define the size of the * stack buffer. */ #define seq_show_option_n(m, name, value, length) { \ char val_buf[length + 1]; \ - strncpy(val_buf, value, length); \ + memcpy(val_buf, value, length); \ val_buf[length] = '\0'; \ seq_show_option(m, name, val_buf); \ } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 987a59d977c5..e92f9d5577ba 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -191,11 +191,9 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type * @preemptible: preemptibility of above locktype - * @lockmember: argument for lockdep_assert_held() - * @lockbase: associated lock release function (prefix only) - * @lock_acquire: associated lock acquisition function (full call) + * @lockbase: prefix for associated lock/unlock */ -#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \ +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ typedef struct seqcount_##lockname { \ seqcount_t seqcount; \ __SEQ_LOCK(locktype *lock); \ @@ -207,6 +205,12 @@ __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ return &s->seqcount; \ } \ \ +static __always_inline const seqcount_t * \ +__seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ +{ \ + return &s->seqcount; \ +} \ + \ static __always_inline unsigned \ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ { \ @@ -216,7 +220,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ return seq; \ \ if (preemptible && unlikely(seq & 1)) { \ - __SEQ_LOCK(lock_acquire); \ + __SEQ_LOCK(lockbase##_lock(s->lock)); \ __SEQ_LOCK(lockbase##_unlock(s->lock)); \ \ /* \ @@ -242,7 +246,7 @@ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ static __always_inline void \ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ { \ - __SEQ_LOCK(lockdep_assert_held(lockmember)); \ + __SEQ_LOCK(lockdep_assert_held(s->lock)); \ } /* @@ -254,6 +258,11 @@ static inline seqcount_t *__seqprop_ptr(seqcount_t *s) return s; } +static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) +{ + return s; +} + static inline unsigned __seqprop_sequence(const seqcount_t *s) { return READ_ONCE(s->sequence); @@ -271,10 +280,10 @@ static inline void __seqprop_assert(const seqcount_t *s) #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) -SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) -SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t @@ -294,19 +303,20 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ - seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s)) + seqcount_##lockname##_t: __seqprop_##lockname##_##prop #define __seqprop(s, prop) _Generic(*(s), \ - seqcount_t: __seqprop_##prop((void *)(s)), \ + seqcount_t: __seqprop_##prop, \ __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ __seqprop_case((s), mutex, prop)) -#define seqprop_ptr(s) __seqprop(s, ptr) -#define seqprop_sequence(s) __seqprop(s, sequence) -#define seqprop_preemptible(s) __seqprop(s, preemptible) -#define seqprop_assert(s) __seqprop(s, assert) +#define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s) +#define seqprop_sequence(s) __seqprop(s, sequence)(s) +#define seqprop_preemptible(s) __seqprop(s, preemptible)(s) +#define seqprop_assert(s) __seqprop(s, assert)(s) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier @@ -355,7 +365,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex */ #define read_seqcount_begin(s) \ ({ \ - seqcount_lockdep_reader_access(seqprop_ptr(s)); \ + seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \ raw_read_seqcount_begin(s); \ }) @@ -421,7 +431,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - do___read_seqcount_retry(seqprop_ptr(s), start) + do___read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -441,7 +451,7 @@ static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - do_read_seqcount_retry(seqprop_ptr(s), start) + do_read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -512,8 +522,8 @@ do { \ static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) { - do_raw_write_seqcount_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); + do_raw_write_seqcount_begin(s); } /** @@ -574,7 +584,7 @@ static inline void do_write_seqcount_end(seqcount_t *s) * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because - * neither writes before and after the barrier are enclosed in a seq-writer + * neither writes before nor after the barrier are enclosed in a seq-writer * critical section that would ensure readers are aware of ongoing writes:: * * seqcount_t seq; @@ -864,7 +874,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) } /* - * For all seqlock_t write side functions, use the the internal + * For all seqlock_t write side functions, use the internal * do_write_seqcount_begin() instead of generic write_seqcount_begin(). * This way, no redundant lockdep_assert_held() checks are added. */ diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index a156d2ed8d9e..89f7b6c63598 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -570,7 +570,7 @@ struct uart_port { struct serial_port_device *port_dev; /* serial core port device */ unsigned long sysrq; /* sysrq timeout */ - unsigned int sysrq_ch; /* char for sysrq */ + u8 sysrq_ch; /* char for sysrq */ unsigned char has_sysrq; unsigned char sysrq_seq; /* index in sysrq_toggle_seq */ @@ -588,6 +588,85 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/** + * uart_port_lock - Lock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock(struct uart_port *up) +{ + spin_lock(&up->lock); +} + +/** + * uart_port_lock_irq - Lock the UART port and disable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock_irq(struct uart_port *up) +{ + spin_lock_irq(&up->lock); +} + +/** + * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + */ +static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_trylock - Try to lock the UART port + * @up: Pointer to UART port structure + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock(struct uart_port *up) +{ + return spin_trylock(&up->lock); +} + +/** + * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) +{ + return spin_trylock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_unlock - Unlock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock(struct uart_port *up) +{ + spin_unlock(&up->lock); +} + +/** + * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock_irq(struct uart_port *up) +{ + spin_unlock_irq(&up->lock); +} + +/** + * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts + * @up: Pointer to UART port structure + * @flags: The saved interrupt flags for restore + */ +static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); @@ -904,16 +983,16 @@ void uart_handle_dcd_change(struct uart_port *uport, bool active); void uart_handle_cts_change(struct uart_port *uport, bool active); void uart_insert_char(struct uart_port *port, unsigned int status, - unsigned int overrun, unsigned int ch, unsigned int flag); + unsigned int overrun, u8 ch, u8 flag); void uart_xchar_out(struct uart_port *uport, int offset); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL #define SYSRQ_TIMEOUT (HZ * 5) -bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch); +bool uart_try_toggle_sysrq(struct uart_port *port, u8 ch); -static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) +static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch) { if (!port->sysrq) return 0; @@ -932,7 +1011,7 @@ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch return 0; } -static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) { if (!port->sysrq) return 0; @@ -953,17 +1032,17 @@ static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int c static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { - int sysrq_ch; + u8 sysrq_ch; if (!port->has_sysrq) { - spin_unlock(&port->lock); + uart_port_unlock(port); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock(&port->lock); + uart_port_unlock(port); if (sysrq_ch) handle_sysrq(sysrq_ch); @@ -972,38 +1051,38 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { - int sysrq_ch; + u8 sysrq_ch; if (!port->has_sysrq) { - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); if (sysrq_ch) handle_sysrq(sysrq_ch); } #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ -static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) +static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch) { return 0; } -static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) { return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { - spin_unlock(&port->lock); + uart_port_unlock(port); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 9029abd29b1c..2caa6b86106a 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -13,20 +13,32 @@ /* inode in-kernel data */ +#ifdef CONFIG_TMPFS_QUOTA +#define SHMEM_MAXQUOTAS 2 +#endif + struct shmem_inode_info { spinlock_t lock; unsigned int seals; /* shmem seals */ unsigned long flags; unsigned long alloced; /* data pages alloced to file */ unsigned long swapped; /* subtotal assigned to swap */ - pgoff_t fallocend; /* highest fallocate endindex */ - struct list_head shrinklist; /* shrinkable hpage inodes */ - struct list_head swaplist; /* chain of maybes on swap */ + union { + struct offset_ctx dir_offsets; /* stable directory offsets */ + struct { + struct list_head shrinklist; /* shrinkable hpage inodes */ + struct list_head swaplist; /* chain of maybes on swap */ + }; + }; + struct timespec64 i_crtime; /* file creation time */ struct shared_policy policy; /* NUMA memory alloc policy */ struct simple_xattrs xattrs; /* list of xattrs */ + pgoff_t fallocend; /* highest fallocate endindex */ + unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */ atomic_t stop_eviction; /* hold when working on inode */ - struct timespec64 i_crtime; /* file creation time */ - unsigned int fsflags; /* flags for FS_IOC_[SG]ETFLAGS */ +#ifdef CONFIG_TMPFS_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif struct inode vfs_inode; }; @@ -35,11 +47,18 @@ struct shmem_inode_info { (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL) #define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL) +struct shmem_quota_limits { + qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */ + qsize_t usrquota_ihardlimit; /* Default user quota inode hard limit */ + qsize_t grpquota_bhardlimit; /* Default group quota block hard limit */ + qsize_t grpquota_ihardlimit; /* Default group quota inode hard limit */ +}; + struct shmem_sb_info { unsigned long max_blocks; /* How many blocks are allowed */ struct percpu_counter used_blocks; /* How many are allocated */ unsigned long max_inodes; /* How many inodes are allowed */ - unsigned long free_inodes; /* How many are left for allocation */ + unsigned long free_ispace; /* How much ispace left for allocation */ raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ umode_t mode; /* Mount mode for root directory */ unsigned char huge; /* Whether to try for hugepages */ @@ -53,6 +72,7 @@ struct shmem_sb_info { spinlock_t shrinklist_lock; /* Protects shrinklist */ struct list_head shrinklist; /* List of shinkable inodes */ unsigned long shrinklist_len; /* Length of shrinklist */ + struct shmem_quota_limits qlimits; /* Default quota limits */ }; static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) @@ -172,4 +192,17 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd, #endif /* CONFIG_SHMEM */ #endif /* CONFIG_USERFAULTFD */ +/* + * Used space is stored as unsigned 64-bit value in bytes but + * quota core supports only signed 64-bit values so use that + * as a limit + */ +#define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */ +#define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL + +#ifdef CONFIG_TMPFS_QUOTA +extern const struct dquot_operations shmem_quota_operations; +extern struct quota_format_type shmem_quota_format; +#endif /* CONFIG_TMPFS_QUOTA */ + #endif diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 224293b2dd06..1a00be90d93a 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -4,6 +4,25 @@ #include <linux/atomic.h> #include <linux/types.h> +#include <linux/refcount.h> +#include <linux/completion.h> + +#define SHRINKER_UNIT_BITS BITS_PER_LONG + +/* + * Bitmap and deferred work of shrinker::id corresponding to memcg-aware + * shrinkers, which have elements charged to the memcg. + */ +struct shrinker_info_unit { + atomic_long_t nr_deferred[SHRINKER_UNIT_BITS]; + DECLARE_BITMAP(map, SHRINKER_UNIT_BITS); +}; + +struct shrinker_info { + struct rcu_head rcu; + int map_nr_max; + struct shrinker_info_unit *unit[]; +}; /* * This struct is used to pass information from page reclaim to the shrinkers. @@ -70,6 +89,19 @@ struct shrinker { int seeks; /* seeks to recreate an obj */ unsigned flags; + /* + * The reference count of this shrinker. Registered shrinker have an + * initial refcount of 1, then the lookup operations are now allowed + * to use it via shrinker_try_get(). Later in the unregistration step, + * the initial refcount will be discarded, and will free the shrinker + * asynchronously via RCU after its refcount reaches 0. + */ + refcount_t refcount; + struct completion done; /* use to wait for refcount to reach 0 */ + struct rcu_head rcu; + + void *private_data; + /* These are for internal use */ struct list_head list; #ifdef CONFIG_MEMCG @@ -86,48 +118,39 @@ struct shrinker { }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ -/* Flags */ -#define SHRINKER_REGISTERED (1 << 0) -#define SHRINKER_NUMA_AWARE (1 << 1) -#define SHRINKER_MEMCG_AWARE (1 << 2) +/* Internal flags */ +#define SHRINKER_REGISTERED BIT(0) +#define SHRINKER_ALLOCATED BIT(1) + +/* Flags for users to use */ +#define SHRINKER_NUMA_AWARE BIT(2) +#define SHRINKER_MEMCG_AWARE BIT(3) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ -#define SHRINKER_NONSLAB (1 << 3) +#define SHRINKER_NONSLAB BIT(4) -extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void register_shrinker_prepared(struct shrinker *shrinker); -extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void unregister_shrinker(struct shrinker *shrinker); -extern void free_prealloced_shrinker(struct shrinker *shrinker); -extern void synchronize_shrinkers(void); +__printf(2, 3) +struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); +void shrinker_register(struct shrinker *shrinker); +void shrinker_free(struct shrinker *shrinker); -#ifdef CONFIG_SHRINKER_DEBUG -extern int shrinker_debugfs_add(struct shrinker *shrinker); -extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, - int *debugfs_id); -extern void shrinker_debugfs_remove(struct dentry *debugfs_entry, - int debugfs_id); -extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, - const char *fmt, ...); -#else /* CONFIG_SHRINKER_DEBUG */ -static inline int shrinker_debugfs_add(struct shrinker *shrinker) -{ - return 0; -} -static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, - int *debugfs_id) +static inline bool shrinker_try_get(struct shrinker *shrinker) { - *debugfs_id = -1; - return NULL; + return refcount_inc_not_zero(&shrinker->refcount); } -static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry, - int debugfs_id) + +static inline void shrinker_put(struct shrinker *shrinker) { + if (refcount_dec_and_test(&shrinker->refcount)) + complete(&shrinker->done); } + +#ifdef CONFIG_SHRINKER_DEBUG +extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, + const char *fmt, ...); +#else /* CONFIG_SHRINKER_DEBUG */ static inline __printf(2, 3) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 91ed66952580..27998f73183e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,7 +32,6 @@ #include <linux/if_packet.h> #include <linux/llist.h> #include <net/flow.h> -#include <net/page_pool.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_common.h> #endif @@ -441,8 +440,6 @@ static inline bool skb_frag_must_loop(struct page *p) copied += p_len, p++, p_off = 0, \ p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \ -#define HAVE_HW_TIME_STAMP - /** * struct skb_shared_hwtstamps - hardware time stamps * @hwtstamp: hardware time stamp transformed into duration @@ -944,7 +941,7 @@ struct sk_buff { __u8 __mono_tc_offset[0]; /* public: */ __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; #endif @@ -993,7 +990,7 @@ struct sk_buff { __u8 csum_not_inet:1; #endif -#ifdef CONFIG_NET_SCHED +#if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS) __u16 tc_index; /* traffic control index */ #endif @@ -1312,7 +1309,7 @@ struct sk_buff_fclones { * * Returns true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), - * so we also check that this didnt happen. + * so we also check that didn't happen. */ static inline bool skb_fclone_busy(const struct sock *sk, const struct sk_buff *skb) @@ -2019,7 +2016,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) * Copy shared buffers into a new sk_buff. We effectively do COW on * packets to handle cases where we have a local reader and forward * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. + * a packet that's being forwarded. */ /** @@ -3152,22 +3149,38 @@ static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) } /** - * __skb_queue_purge - empty a list + * __skb_queue_purge_reason - empty a list * @list: list to empty + * @reason: drop reason * * Delete all buffers on an &sk_buff list. Each buffer is removed from * the list and one reference dropped. This function does not take the * list lock and the caller must hold the relevant locks to use it. */ -static inline void __skb_queue_purge(struct sk_buff_head *list) +static inline void __skb_queue_purge_reason(struct sk_buff_head *list, + enum skb_drop_reason reason) { struct sk_buff *skb; + while ((skb = __skb_dequeue(list)) != NULL) - kfree_skb(skb); + kfree_skb_reason(skb, reason); +} + +static inline void __skb_queue_purge(struct sk_buff_head *list) +{ + __skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE); +} + +void skb_queue_purge_reason(struct sk_buff_head *list, + enum skb_drop_reason reason); + +static inline void skb_queue_purge(struct sk_buff_head *list) +{ + skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE); } -void skb_queue_purge(struct sk_buff_head *list); unsigned int skb_rbtree_purge(struct rb_root *root); +void skb_errqueue_purge(struct sk_buff_head *list); void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); @@ -3423,13 +3436,15 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } +bool napi_pp_put_page(struct page *page, bool napi_safe); + static inline void napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe) { struct page *page = skb_frag_page(frag); #ifdef CONFIG_PAGE_POOL - if (recycle && page_pool_return_skb_page(page, napi_safe)) + if (recycle && napi_pp_put_page(page, napi_safe)) return; #endif put_page(page); @@ -3664,6 +3679,9 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l return __skb_put_padto(skb, len, true); } +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) + __must_check; + static inline int skb_add_data(struct sk_buff *skb, struct iov_iter *from, int copy) { @@ -4023,7 +4041,7 @@ __skb_header_pointer(const struct sk_buff *skb, int offset, int len, if (likely(hlen - offset >= len)) return (void *)data + offset; - if (!skb || !buffer || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0)) + if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0)) return NULL; return buffer; @@ -4036,6 +4054,14 @@ skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) skb_headlen(skb), buffer); } +static inline void * __must_check +skb_pointer_if_linear(const struct sk_buff *skb, int offset, int len) +{ + if (likely(skb_headlen(skb) - offset >= len)) + return skb->data + offset; + return NULL; +} + /** * skb_needs_linearize - check if we need to linearize a given skb * depending on the given device features. diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c1637515a8a4..c953b8c0d2f4 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -106,6 +106,7 @@ struct sk_psock { struct mutex work_mutex; struct sk_psock_work_state work_state; struct delayed_work work; + struct sock *sk_pair; struct rcu_work rwork; }; diff --git a/include/linux/slab.h b/include/linux/slab.h index 848c7c82ad5a..d6d6ffeeb9a2 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -19,6 +19,7 @@ #include <linux/workqueue.h> #include <linux/percpu-refcount.h> #include <linux/cleanup.h> +#include <linux/hash.h> /* @@ -244,8 +245,9 @@ DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) size_t ksize(const void *objp); #ifdef CONFIG_PRINTK -bool kmem_valid_obj(void *object); -void kmem_dump_obj(void *object); +bool kmem_dump_obj(void *object); +#else +static inline bool kmem_dump_obj(void *object) { return false; } #endif /* @@ -345,6 +347,12 @@ static inline unsigned int arch_slab_minalign(void) #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ (KMALLOC_MIN_SIZE) : 16) +#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies +#else +#define RANDOM_KMALLOC_CACHES_NR 0 +#endif + /* * Whenever changing this, take care of that kmalloc_type() and * create_kmalloc_caches() still work as intended. @@ -361,6 +369,8 @@ enum kmalloc_cache_type { #ifndef CONFIG_MEMCG_KMEM KMALLOC_CGROUP = KMALLOC_NORMAL, #endif + KMALLOC_RANDOM_START = KMALLOC_NORMAL, + KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, #ifdef CONFIG_SLUB_TINY KMALLOC_RECLAIM = KMALLOC_NORMAL, #else @@ -386,14 +396,22 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ (IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0)) -static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) +extern unsigned long random_kmalloc_seed; + +static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) { /* * The most common case is KMALLOC_NORMAL, so test for it * with a single branch for all the relevant flags. */ if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) +#ifdef CONFIG_RANDOM_KMALLOC_CACHES + /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ + return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, + ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); +#else return KMALLOC_NORMAL; +#endif /* * At least one of the flags has to be set. Their priorities in @@ -580,7 +598,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) index = kmalloc_index(size); return kmalloc_trace( - kmalloc_caches[kmalloc_type(flags)][index], + kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, size); } return __kmalloc(size, flags); @@ -596,7 +614,7 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla index = kmalloc_index(size); return kmalloc_node_trace( - kmalloc_caches[kmalloc_type(flags)][index], + kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } return __kmalloc_node(size, flags, node); @@ -746,6 +764,8 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t fla extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) __realloc_size(3); extern void kvfree(const void *addr); +DEFINE_FREE(kvfree, void *, if (_T) kvfree(_T)) + extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); diff --git a/include/linux/smp.h b/include/linux/smp.h index 91ea4a67f8ca..e87520dc2959 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,7 +53,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, void *info, bool wait, const struct cpumask *mask); -int smp_call_function_single_async(int cpu, struct __call_single_data *csd); +int smp_call_function_single_async(int cpu, call_single_data_t *csd); /* * Cpus stopping functions in panic. All have default weak definitions. diff --git a/include/linux/smscphy.h b/include/linux/smscphy.h index e1c88627755a..1a6a851d2cf8 100644 --- a/include/linux/smscphy.h +++ b/include/linux/smscphy.h @@ -38,4 +38,38 @@ int smsc_phy_set_tunable(struct phy_device *phydev, struct ethtool_tunable *tuna, const void *data); int smsc_phy_probe(struct phy_device *phydev); +#define MII_LAN874X_PHY_MMD_WOL_WUCSR 0x8010 +#define MII_LAN874X_PHY_MMD_WOL_WUF_CFGA 0x8011 +#define MII_LAN874X_PHY_MMD_WOL_WUF_CFGB 0x8012 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK0 0x8021 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK1 0x8022 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK2 0x8023 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK3 0x8024 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK4 0x8025 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK5 0x8026 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK6 0x8027 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK7 0x8028 +#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRA 0x8061 +#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRB 0x8062 +#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRC 0x8063 +#define MII_LAN874X_PHY_MMD_MCFGR 0x8064 + +#define MII_LAN874X_PHY_PME1_SET (2 << 13) +#define MII_LAN874X_PHY_PME2_SET (2 << 11) +#define MII_LAN874X_PHY_PME_SELF_CLEAR BIT(9) +#define MII_LAN874X_PHY_WOL_PFDA_FR BIT(7) +#define MII_LAN874X_PHY_WOL_WUFR BIT(6) +#define MII_LAN874X_PHY_WOL_MPR BIT(5) +#define MII_LAN874X_PHY_WOL_BCAST_FR BIT(4) +#define MII_LAN874X_PHY_WOL_PFDAEN BIT(3) +#define MII_LAN874X_PHY_WOL_WUEN BIT(2) +#define MII_LAN874X_PHY_WOL_MPEN BIT(1) +#define MII_LAN874X_PHY_WOL_BCSTEN BIT(0) + +#define MII_LAN874X_PHY_WOL_FILTER_EN BIT(15) +#define MII_LAN874X_PHY_WOL_FILTER_MCASTTEN BIT(9) +#define MII_LAN874X_PHY_WOL_FILTER_BCSTEN BIT(8) + +#define MII_LAN874X_PHY_PME_SELF_CLEAR_DELAY 0x1000 /* 81 milliseconds */ + #endif /* __LINUX_SMSCPHY_H__ */ diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 07f67b3d8e97..6c6cccc848f4 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -2,6 +2,47 @@ #ifndef __SOC_MEDIATEK_INFRACFG_H #define __SOC_MEDIATEK_INFRACFG_H +#define MT8365_INFRA_TOPAXI_PROTECTEN_STA1 0x228 +#define MT8365_INFRA_TOPAXI_PROTECTEN_SET 0x2a0 +#define MT8365_INFRA_TOPAXI_PROTECTEN_CLR 0x2a4 +#define MT8365_INFRA_TOPAXI_PROTECTEN_MM_M0 BIT(1) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MDMCU_M1 BIT(2) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MMAPB_S BIT(6) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MM2INFRA_AXI_GALS_SLV_0 BIT(10) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MM2INFRA_AXI_GALS_SLV_1 BIT(11) +#define MT8365_INFRA_TOPAXI_PROTECTEN_AP2CONN_AHB BIT(13) +#define MT8365_INFRA_TOPAXI_PROTECTEN_CONN2INFRA_AHB BIT(14) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MFG_M0 BIT(21) +#define MT8365_INFRA_TOPAXI_PROTECTEN_INFRA2MFG BIT(22) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_STA1 0x258 +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_SET 0x2a8 +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CLR 0x2ac +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_APU2AP BIT(2) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_MM2INFRA_AXI_GALS_MST_0 BIT(16) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_MM2INFRA_AXI_GALS_MST_1 BIT(17) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CONN2INFRA_AXI_GALS_MST BIT(18) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CAM2MM_AXI_GALS_MST BIT(19) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_APU_CBIP_GALS_MST BIT(20) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_INFRA2CONN_AHB_GALS_SLV BIT(21) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_PWRDNREQ_INFRA_GALS_ADB BIT(24) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_PWRDNREQ_MP1_L2C_AFIFO BIT(27) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_AUDIO_M BIT(28) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_DSP_M BIT(30) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_DSP_S BIT(31) + +#define MT8365_INFRA_NAO_TOPAXI_SI0_STA 0x0 +#define MT8365_INFRA_NAO_TOPAXI_SI0_CTRL_UPDATED BIT(24) +#define MT8365_INFRA_NAO_TOPAXI_SI2_STA 0x28 +#define MT8365_INFRA_NAO_TOPAXI_SI2_CTRL_UPDATED BIT(14) +#define MT8365_INFRA_TOPAXI_SI0_CTL 0x200 +#define MT8365_INFRA_TOPAXI_SI0_WAY_EN_MMAPB_S BIT(6) +#define MT8365_INFRA_TOPAXI_SI2_CTL 0x234 +#define MT8365_INFRA_TOPAXI_SI2_WAY_EN_PERI_M1 BIT(5) + +#define MT8365_SMI_COMMON_CLAMP_EN 0x3c0 +#define MT8365_SMI_COMMON_CLAMP_EN_SET 0x3c4 +#define MT8365_SMI_COMMON_CLAMP_EN_CLR 0x3c8 + #define MT8195_TOP_AXI_PROT_EN_STA1 0x228 #define MT8195_TOP_AXI_PROT_EN_1_STA1 0x258 #define MT8195_TOP_AXI_PROT_EN_SET 0x2a0 diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index b2b28180dff7..a476648858a6 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -10,6 +10,7 @@ #define MTK_WED_TX_QUEUES 2 #define MTK_WED_RX_QUEUES 2 +#define MTK_WED_RX_PAGE_QUEUES 3 #define WED_WO_STA_REC 0x6 @@ -45,7 +46,7 @@ enum mtk_wed_wo_cmd { MTK_WED_WO_CMD_WED_END }; -struct mtk_rxbm_desc { +struct mtk_wed_bm_desc { __le32 buf0; __le32 token; } __packed __aligned(4); @@ -76,6 +77,11 @@ struct mtk_wed_wo_rx_stats { __le32 rx_drop_cnt; }; +struct mtk_wed_buf { + void *p; + dma_addr_t phy_addr; +}; + struct mtk_wed_device { #ifdef CONFIG_NET_MEDIATEK_SOC_WED const struct mtk_wed_ops *ops; @@ -94,17 +100,20 @@ struct mtk_wed_device { struct mtk_wed_ring txfree_ring; struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_rro_ring[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_page_ring[MTK_WED_RX_PAGE_QUEUES]; + struct mtk_wed_ring ind_cmd_ring; struct { int size; - void **pages; + struct mtk_wed_buf *pages; struct mtk_wdma_desc *desc; dma_addr_t desc_phys; } tx_buf_ring; struct { int size; - struct mtk_rxbm_desc *desc; + struct mtk_wed_bm_desc *desc; dma_addr_t desc_phys; } rx_buf_ring; @@ -114,6 +123,13 @@ struct mtk_wed_device { dma_addr_t fdbk_phys; } rro; + struct { + int size; + struct mtk_wed_buf *pages; + struct mtk_wed_bm_desc *desc; + dma_addr_t desc_phys; + } hw_rro; + /* filled by driver: */ struct { union { @@ -123,6 +139,7 @@ struct mtk_wed_device { enum mtk_wed_bus_tye bus_type; void __iomem *base; u32 phy_base; + u32 id; u32 wpdma_phys; u32 wpdma_int; @@ -131,18 +148,35 @@ struct mtk_wed_device { u32 wpdma_txfree; u32 wpdma_rx_glo; u32 wpdma_rx; + u32 wpdma_rx_rro[MTK_WED_RX_QUEUES]; + u32 wpdma_rx_pg; bool wcid_512; + bool hw_rro; + bool msi; u16 token_start; unsigned int nbuf; unsigned int rx_nbuf; unsigned int rx_npkt; unsigned int rx_size; + unsigned int amsdu_max_len; u8 tx_tbit[MTK_WED_TX_QUEUES]; u8 rx_tbit[MTK_WED_RX_QUEUES]; + u8 rro_rx_tbit[MTK_WED_RX_QUEUES]; + u8 rx_pg_tbit[MTK_WED_RX_PAGE_QUEUES]; u8 txfree_tbit; + u8 amsdu_max_subframes; + + struct { + u8 se_group_nums; + u16 win_size; + u16 particular_sid; + u32 ack_sn_addr; + dma_addr_t particular_se_phys; + dma_addr_t addr_elem_phys[1024]; + } ind_cmd; u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); @@ -182,6 +216,14 @@ struct mtk_wed_ops { void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev, enum tc_setup_type type, void *type_data); + void (*start_hw_rro)(struct mtk_wed_device *dev, u32 irq_mask, + bool reset); + void (*rro_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + void (*msdu_pg_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + int (*ind_rx_ring_setup)(struct mtk_wed_device *dev, + void __iomem *regs); }; extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; @@ -206,16 +248,27 @@ mtk_wed_device_attach(struct mtk_wed_device *dev) return ret; } -static inline bool -mtk_wed_get_rx_capa(struct mtk_wed_device *dev) +static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev) { #ifdef CONFIG_NET_MEDIATEK_SOC_WED + if (dev->version == 3) + return dev->wlan.hw_rro; + return dev->version != 1; #else return false; #endif } +static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev) +{ +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + return dev->version == 3; +#else + return false; +#endif +} + #ifdef CONFIG_NET_MEDIATEK_SOC_WED #define mtk_wed_device_active(_dev) !!(_dev)->ops #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) @@ -242,6 +295,15 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev) #define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \ (_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data) +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) \ + (_dev)->ops->start_hw_rro(_dev, _mask, _reset) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->rro_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->msdu_pg_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) \ + (_dev)->ops->ind_rx_ring_setup(_dev, _regs) + #else static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) { @@ -261,6 +323,10 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) #define mtk_wed_device_stop(_dev) do {} while (0) #define mtk_wed_device_dma_reset(_dev) do {} while (0) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) do {} while (0) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) -ENODEV #endif #endif diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h index 821a19135bb6..29e06905bc1f 100644 --- a/include/linux/soc/qcom/geni-se.h +++ b/include/linux/soc/qcom/geni-se.h @@ -35,6 +35,7 @@ enum geni_se_protocol_type { GENI_SE_UART, GENI_SE_I2C, GENI_SE_I3C, + GENI_SE_SPI_SLAVE, }; struct geni_wrapper; @@ -73,12 +74,14 @@ struct geni_se { /* Common SE registers */ #define GENI_FORCE_DEFAULT_REG 0x20 +#define GENI_OUTPUT_CTRL 0x24 #define SE_GENI_STATUS 0x40 #define GENI_SER_M_CLK_CFG 0x48 #define GENI_SER_S_CLK_CFG 0x4c #define GENI_IF_DISABLE_RO 0x64 #define GENI_FW_REVISION_RO 0x68 #define SE_GENI_CLK_SEL 0x7c +#define SE_GENI_CFG_SEQ_START 0x84 #define SE_GENI_DMA_MODE_EN 0x258 #define SE_GENI_M_CMD0 0x600 #define SE_GENI_M_CMD_CTRL_REG 0x604 @@ -111,6 +114,9 @@ struct geni_se { /* GENI_FORCE_DEFAULT_REG fields */ #define FORCE_DEFAULT BIT(0) +/* GENI_OUTPUT_CTRL fields */ +#define GENI_IO_MUX_0_EN BIT(0) + /* GENI_STATUS fields */ #define M_GENI_CMD_ACTIVE BIT(0) #define S_GENI_CMD_ACTIVE BIT(12) @@ -130,6 +136,9 @@ struct geni_se { /* GENI_CLK_SEL fields */ #define CLK_SEL_MSK GENMASK(2, 0) +/* SE_GENI_CFG_SEQ_START fields */ +#define START_TRIGGER BIT(0) + /* SE_GENI_DMA_MODE_EN */ #define GENI_DMA_MODE_EN BIT(0) diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 93417ba1ead4..1a886666bbb6 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -30,7 +30,7 @@ #define LLCC_NPU 23 #define LLCC_WLHW 24 #define LLCC_PIMEM 25 -#define LLCC_DRE 26 +#define LLCC_ECC 26 #define LLCC_CVP 28 #define LLCC_MODPE 29 #define LLCC_APTCM 30 diff --git a/include/linux/soc/qcom/qcom_aoss.h b/include/linux/soc/qcom/qcom_aoss.h index 3c2a82e606f8..7361ca028752 100644 --- a/include/linux/soc/qcom/qcom_aoss.h +++ b/include/linux/soc/qcom/qcom_aoss.h @@ -13,13 +13,13 @@ struct qmp; #if IS_ENABLED(CONFIG_QCOM_AOSS_QMP) -int qmp_send(struct qmp *qmp, const void *data, size_t len); +int qmp_send(struct qmp *qmp, const char *fmt, ...); struct qmp *qmp_get(struct device *dev); void qmp_put(struct qmp *qmp); #else -static inline int qmp_send(struct qmp *qmp, const void *data, size_t len) +static inline int qmp_send(struct qmp *qmp, const char *fmt, ...) { return -ENODEV; } diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 2990f425fdef..8190878645f9 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -2,10 +2,13 @@ #ifndef __QCOM_SMD_RPM_H__ #define __QCOM_SMD_RPM_H__ +#include <linux/types.h> + struct qcom_smd_rpm; -#define QCOM_SMD_RPM_ACTIVE_STATE 0 -#define QCOM_SMD_RPM_SLEEP_STATE 1 +#define QCOM_SMD_RPM_ACTIVE_STATE 0 +#define QCOM_SMD_RPM_SLEEP_STATE 1 +#define QCOM_SMD_RPM_STATE_NUM 2 /* * Constants used for addressing resources in the RPM. @@ -44,6 +47,19 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_PKA_CLK 0x616b70 #define QCOM_SMD_RPM_MCFG_CLK 0x6766636d +#define QCOM_RPM_KEY_SOFTWARE_ENABLE 0x6e657773 +#define QCOM_RPM_KEY_PIN_CTRL_CLK_BUFFER_ENABLE_KEY 0x62636370 +#define QCOM_RPM_SMD_KEY_RATE 0x007a484b +#define QCOM_RPM_SMD_KEY_ENABLE 0x62616e45 +#define QCOM_RPM_SMD_KEY_STATE 0x54415453 +#define QCOM_RPM_SCALING_ENABLE_ID 0x2 + +struct clk_smd_rpm_req { + __le32 key; + __le32 nbytes; + __le32 value; +}; + int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, int state, u32 resource_type, u32 resource_id, diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h index 223db6a9c733..a36a3b9d4929 100644 --- a/include/linux/soc/qcom/smem.h +++ b/include/linux/soc/qcom/smem.h @@ -4,6 +4,7 @@ #define QCOM_SMEM_HOST_ANY -1 +bool qcom_smem_is_available(void); int qcom_smem_alloc(unsigned host, unsigned item, size_t size); void *qcom_smem_get(unsigned host, unsigned item, size_t *size); diff --git a/include/linux/socket.h b/include/linux/socket.h index 39b74d83c7c4..cfcb7e2c3813 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -383,6 +383,7 @@ struct ucred { #define SOL_MPTCP 284 #define SOL_MCTP 285 #define SOL_SMC 286 +#define SOL_VSOCK 287 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index bae5e2369b4f..307961b41541 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -55,6 +55,29 @@ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) return copy_from_sockptr_offset(dst, src, 0, size); } +static inline int copy_struct_from_sockptr(void *dst, size_t ksize, + sockptr_t src, size_t usize) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + if (!sockptr_is_kernel(src)) + return copy_struct_from_user(dst, ksize, src.user, size); + + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + char *p = src.kernel; + + while (rest--) { + if (*p++) + return -E2BIG; + } + } + memcpy(dst, src.kernel, size); + return 0; +} + static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, const void *src, size_t size) { diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index f523ceabd059..4f3d14bb1538 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -6,6 +6,8 @@ #include <linux/bug.h> #include <linux/lockdep_types.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/mod_devicetable.h> #include <linux/bitfield.h> @@ -370,6 +372,7 @@ struct sdw_dpn_prop { * @clock_reg_supported: the Peripheral implements the clock base and scale * registers introduced with the SoundWire 1.2 specification. SDCA devices * do not need to set this boolean property as the registers are required. + * @use_domain_irq: call actual IRQ handler on slave, as well as callback */ struct sdw_slave_prop { u32 mipi_revision; @@ -394,6 +397,7 @@ struct sdw_slave_prop { u8 scp_int1_mask; u32 quirks; bool clock_reg_supported; + bool use_domain_irq; }; #define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY BIT(0) @@ -482,6 +486,11 @@ struct sdw_slave_id { __u8 sdw_version:4; }; +struct sdw_extended_slave_id { + int link_id; + struct sdw_slave_id id; +}; + /* * Helper macros to extract the MIPI-defined IDs * @@ -641,6 +650,7 @@ struct sdw_slave_ops { * struct sdw_slave - SoundWire Slave * @id: MIPI device ID * @dev: Linux device + * @irq: IRQ number * @status: Status reported by the Slave * @bus: Bus handle * @prop: Slave properties @@ -670,6 +680,7 @@ struct sdw_slave_ops { struct sdw_slave { struct sdw_slave_id id; struct device dev; + int irq; enum sdw_slave_status status; struct sdw_bus *bus; struct sdw_slave_prop prop; @@ -847,6 +858,8 @@ struct sdw_defer { * @post_bank_switch: Callback for post bank switch * @read_ping_status: Read status from PING frames, reported with two bits per Device. * Bits 31:24 are reserved. + * @get_device_num: Callback for vendor-specific device_number allocation + * @put_device_num: Callback for vendor-specific device_number release * @new_peripheral_assigned: Callback to handle enumeration of new peripheral. */ struct sdw_master_ops { @@ -862,7 +875,11 @@ struct sdw_master_ops { int (*pre_bank_switch)(struct sdw_bus *bus); int (*post_bank_switch)(struct sdw_bus *bus); u32 (*read_ping_status)(struct sdw_bus *bus); - void (*new_peripheral_assigned)(struct sdw_bus *bus, int dev_num); + int (*get_device_num)(struct sdw_bus *bus, struct sdw_slave *slave); + void (*put_device_num)(struct sdw_bus *bus, struct sdw_slave *slave); + void (*new_peripheral_assigned)(struct sdw_bus *bus, + struct sdw_slave *slave, + int dev_num); }; /** @@ -885,6 +902,7 @@ struct sdw_master_ops { * is used to compute and program bus bandwidth, clock, frame shape, * transport and port parameters * @debugfs: Bus debugfs + * @domain: IRQ domain * @defer_msg: Defer message * @clk_stop_timeout: Clock stop timeout computed * @bank_switch_timeout: Bank switch timeout computed @@ -896,9 +914,6 @@ struct sdw_master_ops { * meaningful if multi_link is set. If set to 1, hardware-based * synchronization will be used even if a stream only uses a single * SoundWire segment. - * @dev_num_ida_min: if set, defines the minimum values for the IDA - * used to allocate system-unique device numbers. This value needs to be - * identical across all SoundWire bus in the system. */ struct sdw_bus { struct device *dev; @@ -920,12 +935,13 @@ struct sdw_bus { #ifdef CONFIG_DEBUG_FS struct dentry *debugfs; #endif + struct irq_chip irq_chip; + struct irq_domain *domain; struct sdw_defer defer_msg; unsigned int clk_stop_timeout; u32 bank_switch_timeout; bool multi_link; int hw_sync_min_links; - int dev_num_ida_min; }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 11fc88fb0d78..00bb22d96ae5 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -264,11 +264,6 @@ struct sdw_intel_link_dev; */ #define SDW_INTEL_CLK_STOP_BUS_RESET BIT(3) -struct sdw_intel_slave_id { - int link_id; - struct sdw_slave_id id; -}; - struct hdac_bus; /** @@ -298,7 +293,7 @@ struct sdw_intel_ctx { int num_slaves; acpi_handle handle; struct sdw_intel_link_dev **ldev; - struct sdw_intel_slave_id *ids; + struct sdw_extended_slave_id *ids; struct list_head link_list; struct mutex shim_lock; /* lock for access to shared SHIM registers */ u32 shim_mask; @@ -433,4 +428,11 @@ struct sdw_intel_hw_ops { extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops; extern const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops; +/* + * IDA min selected to allow for 5 unconstrained devices per link, + * and 6 system-unique Device Numbers for wake-capable devices. + */ + +#define SDW_INTEL_DEV_NUM_IDA_MIN 6 + #endif diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index 4658e7801b42..0916cb9bcb0a 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -19,7 +19,7 @@ struct pxa2xx_spi_controller { u16 num_chipselect; u8 enable_dma; u8 dma_burst_size; - bool is_slave; + bool is_target; /* DMA engine specific config */ bool (*dma_filter)(struct dma_chan *chan, void *param); @@ -31,7 +31,7 @@ struct pxa2xx_spi_controller { }; /* - * The controller specific data for SPI slave devices + * The controller specific data for SPI target devices * (resides in spi_board_info.controller_data), * copied to spi_device.platform_data ... mostly for * DMA tuning. diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h index dc2a0cbd210d..f950d280461b 100644 --- a/include/linux/spi/sh_msiof.h +++ b/include/linux/spi/sh_msiof.h @@ -3,8 +3,8 @@ #define __SPI_SH_MSIOF_H__ enum { - MSIOF_SPI_MASTER, - MSIOF_SPI_SLAVE, + MSIOF_SPI_HOST, + MSIOF_SPI_TARGET, }; struct sh_msiof_spi_info { diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 32c94eae8926..255a0562aea5 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -6,18 +6,19 @@ #ifndef __LINUX_SPI_H #define __LINUX_SPI_H +#include <linux/acpi.h> #include <linux/bits.h> +#include <linux/completion.h> #include <linux/device.h> -#include <linux/mod_devicetable.h> -#include <linux/slab.h> +#include <linux/gpio/consumer.h> #include <linux/kthread.h> -#include <linux/completion.h> +#include <linux/mod_devicetable.h> +#include <linux/overflow.h> #include <linux/scatterlist.h> -#include <linux/gpio/consumer.h> +#include <linux/slab.h> +#include <linux/u64_stats_sync.h> #include <uapi/linux/spi/spi.h> -#include <linux/acpi.h> -#include <linux/u64_stats_sync.h> struct dma_chan; struct software_node; @@ -36,7 +37,7 @@ extern struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers - * @syncp: seqcount to protect members in this struct for per-cpu udate + * @syncp: seqcount to protect members in this struct for per-cpu update * on 32-bit systems * * @messages: number of spi-messages handled @@ -55,7 +56,7 @@ extern struct bus_type spi_bus_type; * @bytes_rx: number of bytes received from device * * @transfer_bytes_histo: - * transfer bytes histogramm + * transfer bytes histogram * * @transfers_split_maxsize: * number of transfers that have been split because of @@ -156,7 +157,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * the device will bind to the named driver and only the named driver. * Do not set directly, because core frees it; use driver_set_override() to * set or clear it. - * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when + * @cs_gpiod: GPIO descriptor of the chipselect line (optional, NULL when * not using a GPIO line) * @word_delay: delay to be inserted between consecutive * words of a transfer @@ -212,7 +213,7 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - struct gpio_desc *cs_gpiod; /* Chip select gpio desc */ + struct gpio_desc *cs_gpiod; /* Chip select GPIO descriptor */ struct spi_delay word_delay; /* Inter-word delay */ /* CS delays */ struct spi_delay cs_setup; @@ -223,7 +224,7 @@ struct spi_device { struct spi_statistics __percpu *pcpu_statistics; /* - * likely need more hooks for more protocol options affecting how + * Likely need more hooks for more protocol options affecting how * the controller talks to each chip, like: * - memory packing (12 bit samples into low bits, others zeroed) * - priority @@ -299,11 +300,11 @@ static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_d /** * struct spi_driver - Host side "protocol" driver * @id_table: List of SPI devices supported by this driver - * @probe: Binds this driver to the spi device. Drivers can verify + * @probe: Binds this driver to the SPI device. Drivers can verify * that the device is actually present, and may need to configure * characteristics (such as bits_per_word) which weren't needed for * the initial configuration done during system setup. - * @remove: Unbinds this driver from the spi device + * @remove: Unbinds this driver from the SPI device * @shutdown: Standard shutdown callback used during system state * transitions such as powerdown/halt and kexec * @driver: SPI device drivers should initialize the name and owner @@ -415,7 +416,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @queued: whether this controller is providing an internal message queue * @kworker: pointer to thread struct for message pump * @pump_messages: work struct for scheduling work to the message pump - * @queue_lock: spinlock to syncronise access to message queue + * @queue_lock: spinlock to synchronise access to message queue * @queue: message queue * @cur_msg: the currently in-flight message * @cur_msg_completion: a completion for the current in-flight message @@ -473,7 +474,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller * @target_abort: abort the ongoing transfer request on an SPI target controller - * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS + * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab @@ -500,7 +501,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * If the driver does not set this, the SPI core takes the snapshot as * close to the driver hand-over as possible. * @irq_flags: Interrupt enable state during PTP system timestamping - * @fallback: fallback to pio if dma transfer return failure with + * @fallback: fallback to PIO if DMA transfer return failure with * SPI_TRANS_FAIL_NO_START. * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. @@ -522,15 +523,17 @@ struct spi_controller { struct list_head list; - /* Other than negative (== assign one dynamically), bus_num is fully - * board-specific. usually that simplifies to being SOC-specific. - * example: one SOC has three SPI controllers, numbered 0..2, - * and one board's schematics might show it using SPI-2. software + /* + * Other than negative (== assign one dynamically), bus_num is fully + * board-specific. Usually that simplifies to being SoC-specific. + * example: one SoC has three SPI controllers, numbered 0..2, + * and one board's schematics might show it using SPI-2. Software * would normally use bus_num=2 for that controller. */ s16 bus_num; - /* chipselects will be integral to many controllers; some others + /* + * Chipselects will be integral to many controllers; some others * might use board-specific GPIOs. */ u16 num_chipselect; @@ -562,8 +565,8 @@ struct spi_controller { #define SPI_CONTROLLER_NO_TX BIT(2) /* Can't do buffer write */ #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ - -#define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; @@ -576,8 +579,8 @@ struct spi_controller { }; /* - * on some hardware transfer / message size may be constrained - * the limit may depend on device transfer settings + * On some hardware transfer / message size may be constrained + * the limit may depend on device transfer settings. */ size_t (*max_transfer_size)(struct spi_device *spi); size_t (*max_message_size)(struct spi_device *spi); @@ -595,7 +598,8 @@ struct spi_controller { /* Flag indicating that the SPI bus is locked for exclusive use */ bool bus_lock_flag; - /* Setup mode and clock, etc (spi driver may call many times). + /* + * Setup mode and clock, etc (SPI driver may call many times). * * IMPORTANT: this may be called when transfers to another * device are active. DO NOT UPDATE SHARED REGISTERS in ways @@ -613,18 +617,19 @@ struct spi_controller { */ int (*set_cs_timing)(struct spi_device *spi); - /* Bidirectional bulk transfers + /* + * Bidirectional bulk transfers * * + The transfer() method may not sleep; its main role is * just to add the message to the queue. * + For now there's no remove-from-queue operation, or * any other request management - * + To a given spi_device, message queueing is pure fifo + * + To a given spi_device, message queueing is pure FIFO * * + The controller's main job is to process its message queue, * selecting a chip (for masters), then transferring data * + If there are multiple spi_device children, the i/o queue - * arbitration algorithm is unspecified (round robin, fifo, + * arbitration algorithm is unspecified (round robin, FIFO, * priority, reservations, preemption, etc) * * + Chipselect stays active during the entire message @@ -705,7 +710,7 @@ struct spi_controller { const struct spi_controller_mem_ops *mem_ops; const struct spi_controller_mem_caps *mem_caps; - /* gpio chip select */ + /* GPIO chip select */ struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; s8 unused_native_cs; @@ -789,7 +794,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off); -/* The spi driver core manages memory for the spi_controller classdev */ +/* The SPI driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); @@ -863,6 +868,7 @@ extern int devm_spi_register_controller(struct device *dev, extern void spi_unregister_controller(struct spi_controller *ctlr); #if IS_ENABLED(CONFIG_ACPI) +extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); @@ -878,13 +884,13 @@ typedef void (*spi_res_release_t)(struct spi_controller *ctlr, void *res); /** - * struct spi_res - spi resource management structure + * struct spi_res - SPI resource management structure * @entry: list entry * @release: release code called prior to freeing this resource * @data: extra data allocated for the specific use-case * - * this is based on ideas from devres, but focused on life-cycle - * management during spi_message processing + * This is based on ideas from devres, but focused on life-cycle + * management during spi_message processing. */ struct spi_res { struct list_head entry; @@ -902,7 +908,7 @@ struct spi_res { * * The spi_messages themselves consist of a series of read+write transfer * segments. Those segments always read the same number of bits as they - * write; but one or the other is easily ignored by passing a null buffer + * write; but one or the other is easily ignored by passing a NULL buffer * pointer. (This is unlike most types of I/O API, because SPI hardware * is full duplex.) * @@ -913,8 +919,8 @@ struct spi_res { /** * struct spi_transfer - a read/write buffer pair - * @tx_buf: data to be written (dma-safe memory), or NULL - * @rx_buf: data to be read (dma-safe memory), or NULL + * @tx_buf: data to be written (DMA-safe memory), or NULL + * @rx_buf: data to be read (DMA-safe memory), or NULL * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped * @tx_nbits: number of bits used for writing. If 0 the default @@ -937,7 +943,7 @@ struct spi_res { * @word_delay: inter word delay to be introduced after each word size * (set by bits_per_word) transmission. * @effective_speed_hz: the effective SCK-speed that was used to - * transfer this transfer. Set to 0 if the spi bus driver does + * transfer this transfer. Set to 0 if the SPI bus driver does * not support it. * @transfer_list: transfers are sequenced through @spi_message.transfers * @tx_sg: Scatterlist for transmit, currently not for client use @@ -966,16 +972,16 @@ struct spi_res { * transmitting the "pre" word, and the "post" timestamp after receiving * transmit confirmation from the controller for the "post" word. * @timestamped: true if the transfer has been timestamped - * @error: Error status logged by spi controller driver. + * @error: Error status logged by SPI controller driver. * * SPI transfers always write the same number of bytes as they read. * Protocol drivers should always provide @rx_buf and/or @tx_buf. * In some cases, they may also want to provide DMA addresses for * the data being transferred; that may reduce overhead, when the - * underlying driver uses dma. + * underlying driver uses DMA. * - * If the transmit buffer is null, zeroes will be shifted out - * while filling @rx_buf. If the receive buffer is null, the data + * If the transmit buffer is NULL, zeroes will be shifted out + * while filling @rx_buf. If the receive buffer is NULL, the data * shifted in will be discarded. Only "len" bytes shift out (or in). * It's an error to try to shift out a partial word. (For example, by * shifting out three bytes with word size of sixteen or twenty bits; @@ -1009,7 +1015,7 @@ struct spi_res { * Some devices need protocol transactions to be built from a series of * spi_message submissions, where the content of one message is determined * by the results of previous messages and where the whole transaction - * ends when the chipselect goes intactive. + * ends when the chipselect goes inactive. * * When SPI can transfer in 1x,2x or 4x. It can get this transfer information * from device through @tx_nbits and @rx_nbits. In Bi-direction, these @@ -1023,10 +1029,11 @@ struct spi_res { * and its transfers, ignore them until its completion callback. */ struct spi_transfer { - /* It's ok if tx_buf == rx_buf (right?) - * for MicroWire, one buffer must be null - * buffers must work with dma_*map_single() calls, unless - * spi_message.is_dma_mapped reports a pre-existing mapping + /* + * It's okay if tx_buf == rx_buf (right?). + * For MicroWire, one buffer must be NULL. + * Buffers must work with dma_*map_single() calls, unless + * spi_message.is_dma_mapped reports a pre-existing mapping. */ const void *tx_buf; void *rx_buf; @@ -1046,9 +1053,9 @@ struct spi_transfer { unsigned tx_nbits:3; unsigned rx_nbits:3; unsigned timestamped:1; -#define SPI_NBITS_SINGLE 0x01 /* 1bit transfer */ -#define SPI_NBITS_DUAL 0x02 /* 2bits transfer */ -#define SPI_NBITS_QUAD 0x04 /* 4bits transfer */ +#define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ +#define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ +#define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ u8 bits_per_word; struct spi_delay delay; struct spi_delay cs_change_delay; @@ -1069,7 +1076,7 @@ struct spi_transfer { * struct spi_message - one multi-segment SPI transaction * @transfers: list of transfer segments in this transaction * @spi: SPI device to which the transaction is queued - * @is_dma_mapped: if true, the caller provided both dma and cpu virtual + * @is_dma_mapped: if true, the caller provided both DMA and CPU virtual * addresses for each transfer buffer * @complete: called to report transaction completions * @context: the argument to complete() when it's called @@ -1079,7 +1086,7 @@ struct spi_transfer { * @status: zero for success, else negative errno * @queue: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message - * @resources: for resource management when the spi message is processed + * @resources: for resource management when the SPI message is processed * @prepared: spi_prepare_message was called for the this message * * A @spi_message is used to execute an atomic sequence of data transfers, @@ -1106,7 +1113,8 @@ struct spi_message { /* spi_prepare_message() was called for this message */ bool prepared; - /* REVISIT: we might want a flag affecting the behavior of the + /* + * REVISIT: we might want a flag affecting the behavior of the * last transfer ... allowing things like "read 16 bit length L" * immediately followed by "read L bytes". Basically imposing * a specific message scheduling algorithm. @@ -1124,14 +1132,15 @@ struct spi_message { unsigned frame_length; unsigned actual_length; - /* For optional use by whatever driver currently owns the + /* + * For optional use by whatever driver currently owns the * spi_message ... between calls to spi_async and then later * complete(), that's the spi_controller controller driver. */ struct list_head queue; void *state; - /* List of spi_res reources when the spi message is processed */ + /* List of spi_res resources when the SPI message is processed */ struct list_head resources; }; @@ -1168,7 +1177,7 @@ spi_transfer_delay_exec(struct spi_transfer *t) /** * spi_message_init_with_transfers - Initialize spi_message and append transfers * @m: spi_message to be initialized - * @xfers: An array of spi transfers + * @xfers: An array of SPI transfers * @num_xfers: Number of items in the xfer array * * This function initializes the given spi_message and adds each spi_transfer in @@ -1185,26 +1194,27 @@ struct spi_transfer *xfers, unsigned int num_xfers) spi_message_add_tail(&xfers[i], m); } -/* It's fine to embed message and transaction structures in other data +/* + * It's fine to embed message and transaction structures in other data * structures so long as you don't free them while they're in use. */ - static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags) { - struct spi_message *m; - - m = kzalloc(sizeof(struct spi_message) - + ntrans * sizeof(struct spi_transfer), - flags); - if (m) { - unsigned i; - struct spi_transfer *t = (struct spi_transfer *)(m + 1); - - spi_message_init_no_memset(m); - for (i = 0; i < ntrans; i++, t++) - spi_message_add_tail(t, m); - } - return m; + struct spi_message_with_transfers { + struct spi_message m; + struct spi_transfer t[]; + } *mwt; + unsigned i; + + mwt = kzalloc(struct_size(mwt, t, ntrans), flags); + if (!mwt) + return NULL; + + spi_message_init_no_memset(&mwt->m); + for (i = 0; i < ntrans; i++) + spi_message_add_tail(&mwt->t[i], &mwt->m); + + return &mwt->m; } static inline void spi_message_free(struct spi_message *m) @@ -1291,7 +1301,7 @@ typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr, * replacements that have occurred * so that they can get reverted * @release: some extra release code to get executed prior to - * relasing this structure + * releasing this structure * @extradata: pointer to some extra data if requested or NULL * @replaced_transfers: transfers that have been replaced and which need * to get restored @@ -1301,9 +1311,9 @@ typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr, * @inserted_transfers: array of spi_transfers of array-size @inserted, * that have been replacing replaced_transfers * - * note: that @extradata will point to @inserted_transfers[@inserted] + * Note: that @extradata will point to @inserted_transfers[@inserted] * if some extra allocation is requested, so alignment will be the same - * as for spi_transfers + * as for spi_transfers. */ struct spi_replaced_transfers { spi_replaced_release_t release; @@ -1329,7 +1339,8 @@ extern int spi_split_transfers_maxwords(struct spi_controller *ctlr, /*---------------------------------------------------------------------------*/ -/* All these synchronous SPI transfer routines are utilities layered +/* + * All these synchronous SPI transfer routines are utilities layered * over the core async transfer primitive. Here, "synchronous" means * they will sleep uninterruptibly until the async transfer completes. */ @@ -1472,7 +1483,7 @@ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd) * * Callable only from contexts that can sleep. * - * Return: the (unsigned) sixteen bit number returned by the device in cpu + * Return: the (unsigned) sixteen bit number returned by the device in CPU * endianness, or else a negative error code. */ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) @@ -1500,7 +1511,7 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) * As a rule, SPI devices can't be probed. Instead, board init code * provides a table listing the devices which are present, with enough * information to bind and set up the device's driver. There's basic - * support for nonstatic configurations too; enough to handle adding + * support for non-static configurations too; enough to handle adding * parport adapters, or microcontrollers acting as USB-to-SPI bridges. */ @@ -1537,12 +1548,13 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) * are active in some dynamic board configuration models. */ struct spi_board_info { - /* The device name and module name are coupled, like platform_bus; + /* + * The device name and module name are coupled, like platform_bus; * "modalias" is normally the driver name. * * platform_data goes to spi_device.dev.platform_data, * controller_data goes to spi_device.controller_data, - * irq is copied too + * IRQ is copied too. */ char modalias[SPI_NAME_SIZE]; const void *platform_data; @@ -1554,7 +1566,8 @@ struct spi_board_info { u32 max_speed_hz; - /* bus_num is board specific and matches the bus_num of some + /* + * bus_num is board specific and matches the bus_num of some * spi_controller that will probably be registered later. * * chip_select reflects how this chip is wired to that master; @@ -1563,12 +1576,14 @@ struct spi_board_info { u16 bus_num; u16 chip_select; - /* mode becomes spi_device.mode, and is essential for chips + /* + * mode becomes spi_device.mode, and is essential for chips * where the default of SPI_CS_HIGH = 0 is wrong. */ u32 mode; - /* ... may need additional spi_device chip config data here. + /* + * ... may need additional spi_device chip config data here. * avoid stuff protocol drivers can set; but include stuff * needed to behave without being bound to a driver: * - quirks like clock rate mattering when not selected @@ -1585,7 +1600,8 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n) { return 0; } #endif -/* If you're hotplugging an adapter with devices (parport, usb, etc) +/* + * If you're hotplugging an adapter with devices (parport, USB, etc) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). @@ -1623,10 +1639,6 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) #define spi_master spi_controller #define SPI_MASTER_HALF_DUPLEX SPI_CONTROLLER_HALF_DUPLEX -#define SPI_MASTER_NO_RX SPI_CONTROLLER_NO_RX -#define SPI_MASTER_NO_TX SPI_CONTROLLER_NO_TX -#define SPI_MASTER_MUST_RX SPI_CONTROLLER_MUST_RX -#define SPI_MASTER_MUST_TX SPI_CONTROLLER_MUST_TX #define spi_master_get_devdata(_ctlr) spi_controller_get_devdata(_ctlr) #define spi_master_set_devdata(_ctlr, _data) \ diff --git a/include/linux/spmi.h b/include/linux/spmi.h index eac1956a8727..2a4ce4144f9f 100644 --- a/include/linux/spmi.h +++ b/include/linux/spmi.h @@ -166,7 +166,7 @@ static inline void spmi_driver_unregister(struct spmi_driver *sdrv) struct device_node; -struct spmi_device *spmi_device_from_of(struct device_node *np); +struct spmi_device *spmi_find_device_by_of_node(struct device_node *np); int spmi_register_read(struct spmi_device *sdev, u8 addr, u8 *buf); int spmi_ext_register_read(struct spmi_device *sdev, u8 addr, u8 *buf, size_t len); diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h new file mode 100644 index 000000000000..33dcbec71925 --- /dev/null +++ b/include/linux/sprintf.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KERNEL_SPRINTF_H_ +#define _LINUX_KERNEL_SPRINTF_H_ + +#include <linux/compiler_attributes.h> +#include <linux/types.h> + +int num_to_str(char *buf, int size, unsigned long long num, unsigned int width); + +__printf(2, 3) int sprintf(char *buf, const char * fmt, ...); +__printf(2, 0) int vsprintf(char *buf, const char *, va_list); +__printf(3, 4) int snprintf(char *buf, size_t size, const char *fmt, ...); +__printf(3, 0) int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +__printf(3, 4) int scnprintf(char *buf, size_t size, const char *fmt, ...); +__printf(3, 0) int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); +__printf(2, 3) __malloc char *kasprintf(gfp_t gfp, const char *fmt, ...); +__printf(2, 0) __malloc char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); +__printf(2, 0) const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args); + +__scanf(2, 3) int sscanf(const char *, const char *, ...); +__scanf(2, 0) int vsscanf(const char *, const char *, va_list); + +/* These are for specific cases, do not use without real need */ +extern bool no_hash_pointers; +int no_hash_pointers_enable(char *str); + +#endif /* _LINUX_KERNEL_SPRINTF_H */ diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index ebd72491af99..447133171d95 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -48,6 +48,10 @@ void srcu_drive_gp(struct work_struct *wp); #define DEFINE_STATIC_SRCU(name) \ static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name) +// Dummy structure for srcu_notifier_head. +struct srcu_usage { }; +#define __SRCU_USAGE_INIT(name) { } + void synchronize_srcu(struct srcu_struct *ssp); /* diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index c36e7a3b45e7..3be2cb564710 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -14,6 +14,7 @@ #ifdef CONFIG_GCC_PLUGIN_STACKLEAK #include <asm/stacktrace.h> +#include <linux/linkage.h> /* * The lowest address on tsk's stack which we can plausibly erase. @@ -76,6 +77,11 @@ static inline void stackleak_task_init(struct task_struct *t) # endif } +asmlinkage void noinstr stackleak_erase(void); +asmlinkage void noinstr stackleak_erase_on_task_stack(void); +asmlinkage void noinstr stackleak_erase_off_task_stack(void); +void __no_caller_saved_registers noinstr stackleak_track_stack(void); + #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ static inline void stackleak_task_init(struct task_struct *t) { } #endif diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 06090538fe2d..dee5ad6e48c5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -76,6 +76,8 @@ | DMA_AXI_BLEN_32 | DMA_AXI_BLEN_64 \ | DMA_AXI_BLEN_128 | DMA_AXI_BLEN_256) +struct stmmac_priv; + /* Platfrom data for platform device structure's platform_data field */ struct stmmac_mdio_bus_data { @@ -137,6 +139,7 @@ struct stmmac_rxq_cfg { struct stmmac_txq_cfg { u32 weight; + bool coe_unsupported; u8 mode_to_use; /* Credit Base Shaper parameters */ u32 send_slope; @@ -172,6 +175,7 @@ struct stmmac_fpe_cfg { bool hs_enable; /* FPE handshake enable */ enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ + u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ }; struct stmmac_safety_feature_cfg { @@ -204,14 +208,41 @@ struct dwmac4_addrs { u32 mtl_low_cred_offset; }; +#define STMMAC_FLAG_HAS_INTEGRATED_PCS BIT(0) +#define STMMAC_FLAG_SPH_DISABLE BIT(1) +#define STMMAC_FLAG_USE_PHY_WOL BIT(2) +#define STMMAC_FLAG_HAS_SUN8I BIT(3) +#define STMMAC_FLAG_TSO_EN BIT(4) +#define STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP BIT(5) +#define STMMAC_FLAG_VLAN_FAIL_Q_EN BIT(6) +#define STMMAC_FLAG_MULTI_MSI_EN BIT(7) +#define STMMAC_FLAG_EXT_SNAPSHOT_EN BIT(8) +#define STMMAC_FLAG_INT_SNAPSHOT_EN BIT(9) +#define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(10) +#define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING BIT(11) +#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(12) + struct plat_stmmacenet_data { int bus_id; int phy_addr; - int interface; + /* MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media + * ^ ^ + * mac_interface phy_interface + * + * mac_interface is the MAC-side interface, which may be the same + * as phy_interface if there is no intervening PCS. If there is a + * PCS, then mac_interface describes the interface mode between the + * MAC and PCS, and phy_interface describes the interface mode + * between the PCS and PHY. + */ + phy_interface_t mac_interface; + /* phy_interface is the PHY-side interface - the interface used by + * an attached PHY. + */ phy_interface_t phy_interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; - struct device_node *phylink_node; + struct fwnode_handle *port_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; struct stmmac_est *est; @@ -240,12 +271,12 @@ struct plat_stmmacenet_data { u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; - void (*fix_mac_speed)(void *priv, unsigned int speed); + void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode); int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); void (*speed_mode_2500)(struct net_device *ndev, void *priv); - void (*ptp_clk_freq_config)(void *priv); + void (*ptp_clk_freq_config)(struct stmmac_priv *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); struct mac_device_info *(*setup)(void *priv); @@ -266,22 +297,13 @@ struct plat_stmmacenet_data { struct reset_control *stmmac_ahb_rst; struct stmmac_axi *axi; int has_gmac4; - bool has_sun8i; - bool tso_en; int rss_en; int mac_port_sel_speed; - bool en_tx_lpi_clockgating; - bool rx_clk_runs_in_lpi; int has_xgmac; - bool vlan_fail_q_en; u8 vlan_fail_q; unsigned int eee_usecs_rate; struct pci_dev *pdev; int int_snapshot_num; - int ext_snapshot_num; - bool int_snapshot_en; - bool ext_snapshot_en; - bool multi_msi_en; int msi_mac_vec; int msi_wol_vec; int msi_lpi_vec; @@ -289,10 +311,7 @@ struct plat_stmmacenet_data { int msi_sfty_ue_vec; int msi_rx_base_vec; int msi_tx_base_vec; - bool use_phy_wol; - bool sph_disable; - bool serdes_up_after_phy_linkup; const struct dwmac4_addrs *dwmac4_addrs; - bool has_integrated_pcs; + unsigned int flags; }; #endif diff --git a/include/linux/string.h b/include/linux/string.h index dbfc66400050..ce137830a0b9 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -2,10 +2,13 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ +#include <linux/array_size.h> #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ #include <linux/stddef.h> /* for NULL */ +#include <linux/err.h> /* for ERR_PTR() */ #include <linux/errno.h> /* for E2BIG */ +#include <linux/overflow.h> /* for check_mul_overflow() */ #include <linux/stdarg.h> #include <uapi/linux/string.h> @@ -14,6 +17,44 @@ extern void *memdup_user(const void __user *, size_t); extern void *vmemdup_user(const void __user *, size_t); extern void *memdup_user_nul(const void __user *, size_t); +/** + * memdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result is physically + * contiguous, to be freed by kfree(). + */ +static inline void *memdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return memdup_user(src, nbytes); +} + +/** + * vmemdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result may be not + * physically contiguous. Use kvfree() to free. + */ +static inline void *vmemdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return vmemdup_user(src, nbytes); +} + /* * Include machine specific inline routines */ @@ -277,10 +318,12 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, */ #define strtomem_pad(dest, src, pad) do { \ const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ _dest_len == (size_t)-1); \ - memcpy_and_pad(dest, _dest_len, src, strnlen(src, _dest_len), pad); \ + memcpy_and_pad(dest, _dest_len, src, \ + strnlen(src, min(_src_len, _dest_len)), pad); \ } while (0) /** @@ -298,10 +341,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, */ #define strtomem(dest, src) do { \ const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ _dest_len == (size_t)-1); \ - memcpy(dest, src, min(_dest_len, strnlen(src, _dest_len))); \ + memcpy(dest, src, strnlen(src, min(_src_len, _dest_len))); \ } while (0) /** diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index 48120222b9b2..3c1091941eb8 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -30,6 +30,7 @@ static inline const char *str_read_write(bool v) { return v ? "read" : "write"; } +#define str_write_read(v) str_read_write(!(v)) static inline const char *str_on_off(bool v) { diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 789ab30045da..58fb1f90eda5 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -24,8 +24,8 @@ enum string_size_units { STRING_UNITS_2, /* use binary powers of 2^10 */ }; -void string_get_size(u64 size, u64 blk_size, enum string_size_units units, - char *buf, int len); +int string_get_size(u64 size, u64 blk_size, enum string_size_units units, + char *buf, int len); int parse_int_array_user(const char __user *from, size_t count, int **array); @@ -109,6 +109,8 @@ char *kstrdup_quotable(const char *src, gfp_t gfp); char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); char *kstrdup_quotable_file(struct file *file, gfp_t gfp); +char *kstrdup_and_replace(const char *src, char old, char new, gfp_t gfp); + char **kasprintf_strarray(gfp_t gfp, const char *prefix, size_t n); void kfree_strarray(char **array, size_t n); diff --git a/include/linux/stringify.h b/include/linux/stringify.h index 841cec8ed525..0e84cbe65270 100644 --- a/include/linux/stringify.h +++ b/include/linux/stringify.h @@ -9,4 +9,6 @@ #define __stringify_1(x...) #x #define __stringify(x...) __stringify_1(x) +#define FILE_LINE __FILE__ ":" __stringify(__LINE__) + #endif /* !__LINUX_STRINGIFY_H */ diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 518bd28f5ab8..35766963dd14 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -56,10 +56,14 @@ struct cache_head { struct kref ref; unsigned long flags; }; -#define CACHE_VALID 0 /* Entry contains valid data */ -#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */ -#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/ -#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */ + +/* cache_head.flags */ +enum { + CACHE_VALID, /* Entry contains valid data */ + CACHE_NEGATIVE, /* Negative entry - there is no match for the key */ + CACHE_PENDING, /* An upcall has been sent but no reply received yet*/ + CACHE_CLEANED, /* Entry has been cleaned from cache */ +}; #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 4f41d839face..e9d4377d03c6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -92,6 +92,7 @@ struct rpc_clnt { }; const struct cred *cl_cred; unsigned int cl_max_connect; /* max number of transports not to the same IP */ + struct super_block *pipefs_sb; }; /* @@ -148,6 +149,8 @@ struct rpc_create_args { const struct cred *cred; unsigned int max_connect; struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; }; struct rpc_add_xprt_test { diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index d94d4f410507..3ce1550d1beb 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -43,22 +43,6 @@ struct net; #ifdef CONFIG_PROC_FS int rpc_proc_init(struct net *); void rpc_proc_exit(struct net *); -#else -static inline int rpc_proc_init(struct net *net) -{ - return 0; -} - -static inline void rpc_proc_exit(struct net *net) -{ -} -#endif - -#ifdef MODULE -void rpc_modcount(struct inode *, int); -#endif - -#ifdef CONFIG_PROC_FS struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *); void rpc_proc_unregister(struct net *,const char *); void rpc_proc_zero(const struct rpc_program *); @@ -69,7 +53,14 @@ void svc_proc_unregister(struct net *, const char *); void svc_seq_show(struct seq_file *, const struct svc_stat *); #else +static inline int rpc_proc_init(struct net *net) +{ + return 0; +} +static inline void rpc_proc_exit(struct net *net) +{ +} static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; } static inline void rpc_proc_unregister(struct net *net, const char *p) {} static inline void rpc_proc_zero(const struct rpc_program *p) {} diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f8751118c122..b10f987509cc 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -17,6 +17,7 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/svcauth.h> +#include <linux/lwq.h> #include <linux/wait.h> #include <linux/mm.h> #include <linux/pagevec.h> @@ -33,22 +34,27 @@ */ struct svc_pool { unsigned int sp_id; /* pool id; also node id on NUMA */ - spinlock_t sp_lock; /* protects all fields */ - struct list_head sp_sockets; /* pending sockets */ - unsigned int sp_nrthreads; /* # of threads in pool */ + struct lwq sp_xprts; /* pending transports */ + atomic_t sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ + struct llist_head sp_idle_threads; /* idle server threads */ /* statistics on pool operation */ + struct percpu_counter sp_messages_arrived; struct percpu_counter sp_sockets_queued; struct percpu_counter sp_threads_woken; - struct percpu_counter sp_threads_timedout; -#define SP_TASK_PENDING (0) /* still work to do even if no - * xprt is queued. */ -#define SP_CONGESTED (1) unsigned long sp_flags; } ____cacheline_aligned_in_smp; +/* bits for sp_flags */ +enum { + SP_TASK_PENDING, /* still work to do even if no xprt is queued */ + SP_NEED_VICTIM, /* One thread needs to agree to exit */ + SP_VICTIM_REMAINS, /* One thread needs to actually exit */ +}; + + /* * RPC service. * @@ -84,12 +90,9 @@ struct svc_serv { int (*sv_threadfn)(void *data); #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head sv_cb_list; /* queue for callback requests + struct lwq sv_cb_list; /* queue for callback requests * that arrive over the same * connection */ - spinlock_t sv_cb_lock; /* protects the svc_cb_list */ - wait_queue_head_t sv_cb_waitq; /* sleep here if there are no - * entries in the svc_cb_list */ bool sv_bc_enabled; /* service uses backchannel */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; @@ -120,19 +123,6 @@ static inline void svc_put(struct svc_serv *serv) kref_put(&serv->sv_refcnt, svc_destroy); } -/** - * svc_put_not_last - decrement non-final reference count on SUNRPC serv - * @serv: the svc_serv to have count decremented - * - * Returns: %true is refcount was decremented. - * - * If the refcount is 1, it is not decremented and instead failure is reported. - */ -static inline bool svc_put_not_last(struct svc_serv *serv) -{ - return refcount_dec_not_one(&serv->sv_refcnt.refcount); -} - /* * Maximum payload size supported by a kernel RPC server. * This is use to determine the max number of pages nfsd is @@ -195,6 +185,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); */ struct svc_rqst { struct list_head rq_all; /* all threads list */ + struct llist_node rq_idle; /* On the idle list */ struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ struct svc_xprt * rq_xprt; /* transport ptr */ @@ -232,16 +223,6 @@ struct svc_rqst { u32 rq_proc; /* procedure number */ u32 rq_prot; /* IP protocol */ int rq_cachetype; /* catering to nfsd */ -#define RQ_SECURE (0) /* secure port */ -#define RQ_LOCAL (1) /* local request */ -#define RQ_USEDEFERRAL (2) /* use deferral */ -#define RQ_DROPME (3) /* drop current reply */ -#define RQ_SPLICE_OK (4) /* turned off in gss privacy - * to prevent encrypting page - * cache pages */ -#define RQ_VICTIM (5) /* about to be shut down */ -#define RQ_BUSY (6) /* request is busy */ -#define RQ_DATA (7) /* request has data */ unsigned long rq_flags; /* flags field */ ktime_t rq_qtime; /* enqueue time */ @@ -265,12 +246,24 @@ struct svc_rqst { /* Catering to nfsd */ struct auth_domain * rq_client; /* RPC peer info */ struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ - struct svc_cacherep * rq_cacherep; /* cache info */ struct task_struct *rq_task; /* service thread */ struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ void ** rq_lease_breaker; /* The v4 client breaking a lease */ + unsigned int rq_status_counter; /* RPC processing counter */ +}; + +/* bits for rq_flags */ +enum { + RQ_SECURE, /* secure port */ + RQ_LOCAL, /* local request */ + RQ_USEDEFERRAL, /* use deferral */ + RQ_DROPME, /* drop current reply */ + RQ_SPLICE_OK, /* turned off in gss privacy to prevent + * encrypting page cache pages */ + RQ_VICTIM, /* Have agreed to shut down */ + RQ_DATA, /* request has data */ }; #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) @@ -308,6 +301,28 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) return (struct sockaddr *) &rqst->rq_daddr; } +/** + * svc_thread_should_stop - check if this thread should stop + * @rqstp: the thread that might need to stop + * + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS + * are set. The first thread which sees SP_NEED_VICTIM clears it, becoming + * the victim using this function. It should then promptly call + * svc_exit_thread() to complete the process, clearing SP_VICTIM_REMAINS + * so the task waiting for a thread to exit can wake and continue. + * + * Return values: + * %true: caller should invoke svc_exit_thread() + * %false: caller should do nothing + */ +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) +{ + if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags)) + set_bit(RQ_VICTIM, &rqstp->rq_flags); + + return test_bit(RQ_VICTIM, &rqstp->rq_flags); +} + struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ struct svc_xprt *xprt; @@ -344,7 +359,7 @@ struct svc_program { char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ struct svc_stat * pg_stats; /* rpc statistics */ - int (*pg_authenticate)(struct svc_rqst *); + enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); __be32 (*pg_init_request)(struct svc_rqst *, const struct svc_program *, struct svc_process_info *); @@ -420,13 +435,13 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_process(struct svc_rqst *rqstp); -int bc_svc_process(struct svc_serv *, struct rpc_rqst *, - struct svc_rqst *); +void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp); int svc_register(const struct svc_serv *, struct net *, const int, const unsigned short, const unsigned short); void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); +void svc_pool_wake_idle_thread(struct svc_pool *pool); struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv); char * svc_print_addr(struct svc_rqst *, char *, size_t); const char * svc_proc_name(const struct svc_rqst *rqstp); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index a6b12631db21..8e20cd60e2e7 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -54,25 +54,8 @@ struct svc_xprt { const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; struct list_head xpt_list; - struct list_head xpt_ready; + struct lwq_node xpt_ready; unsigned long xpt_flags; -#define XPT_BUSY 0 /* enqueued/receiving */ -#define XPT_CONN 1 /* conn pending */ -#define XPT_CLOSE 2 /* dead or dying */ -#define XPT_DATA 3 /* data pending */ -#define XPT_TEMP 4 /* connected transport */ -#define XPT_DEAD 6 /* transport closed */ -#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */ -#define XPT_DEFERRED 8 /* deferred request pending */ -#define XPT_OLD 9 /* used for xprt aging mark+sweep */ -#define XPT_LISTENER 10 /* listening endpoint */ -#define XPT_CACHE_AUTH 11 /* cache auth info */ -#define XPT_LOCAL 12 /* connection from loopback interface */ -#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ -#define XPT_CONG_CTRL 14 /* has congestion control */ -#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ -#define XPT_TLS_SESSION 16 /* transport-layer security established */ -#define XPT_PEER_AUTH 17 /* peer has been authenticated */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ @@ -97,6 +80,27 @@ struct svc_xprt { struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */ }; +/* flag bits for xpt_flags */ +enum { + XPT_BUSY, /* enqueued/receiving */ + XPT_CONN, /* conn pending */ + XPT_CLOSE, /* dead or dying */ + XPT_DATA, /* data pending */ + XPT_TEMP, /* connected transport */ + XPT_DEAD, /* transport closed */ + XPT_CHNGBUF, /* need to change snd/rcv buf sizes */ + XPT_DEFERRED, /* deferred request pending */ + XPT_OLD, /* used for xprt aging mark+sweep */ + XPT_LISTENER, /* listening endpoint */ + XPT_CACHE_AUTH, /* cache auth info */ + XPT_LOCAL, /* connection from loopback interface */ + XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */ + XPT_CONG_CTRL, /* has congestion control */ + XPT_HANDSHAKE, /* xprt requests a handshake */ + XPT_TLS_SESSION, /* transport-layer security established */ + XPT_PEER_AUTH, /* peer has been authenticated */ +}; + static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) { spin_lock(&xpt->xpt_lock); diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 6d9cc9080aca..6f90203edbf8 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -83,6 +83,19 @@ struct auth_domain { struct rcu_head rcu_head; }; +enum svc_auth_status { + SVC_GARBAGE = 1, + SVC_SYSERR, + SVC_VALID, + SVC_NEGATIVE, + SVC_OK, + SVC_DROP, + SVC_CLOSE, + SVC_DENIED, + SVC_PENDING, + SVC_COMPLETE, +}; + /* * Each authentication flavour registers an auth_ops * structure. @@ -98,6 +111,8 @@ struct auth_domain { * is (probably) already in place. Certainly space is * reserved for it. * DROP - simply drop the request. It may have been deferred + * CLOSE - like SVC_DROP, but request is definitely lost. + * If there is a tcp connection, it should be closed. * GARBAGE - rpc garbage_args error * SYSERR - rpc system_err error * DENIED - authp holds reason for denial. @@ -111,14 +126,10 @@ struct auth_domain { * * release() is given a request after the procedure has been run. * It should sign/encrypt the results if needed - * It should return: - * OK - the resbuf is ready to be sent - * DROP - the reply should be quitely dropped - * DENIED - authp holds a reason for MSG_DENIED - * SYSERR - rpc system_err * * domain_release() * This call releases a domain. + * * set_client() * Givens a pending request (struct svc_rqst), finds and assigns * an appropriate 'auth_domain' as the client. @@ -127,44 +138,28 @@ struct auth_ops { char * name; struct module *owner; int flavour; - int (*accept)(struct svc_rqst *rq); - int (*release)(struct svc_rqst *rq); - void (*domain_release)(struct auth_domain *); - int (*set_client)(struct svc_rqst *rq); -}; -#define SVC_GARBAGE 1 -#define SVC_SYSERR 2 -#define SVC_VALID 3 -#define SVC_NEGATIVE 4 -#define SVC_OK 5 -#define SVC_DROP 6 -#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely - * lost so if there is a tcp connection, it - * should be closed - */ -#define SVC_DENIED 8 -#define SVC_PENDING 9 -#define SVC_COMPLETE 10 + enum svc_auth_status (*accept)(struct svc_rqst *rqstp); + int (*release)(struct svc_rqst *rqstp); + void (*domain_release)(struct auth_domain *dom); + enum svc_auth_status (*set_client)(struct svc_rqst *rqstp); +}; struct svc_xprt; -extern int svc_authenticate(struct svc_rqst *rqstp); +extern enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp); extern int svc_authorise(struct svc_rqst *rqstp); -extern int svc_set_client(struct svc_rqst *rqstp); +extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp); extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); extern void svc_auth_unregister(rpc_authflavor_t flavor); extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); -extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom); extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); -extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr); -extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(struct net *net); extern void svcauth_unix_info_release(struct svc_xprt *xpt); -extern int svcauth_unix_set_client(struct svc_rqst *rqstp); +extern enum svc_auth_status svcauth_unix_set_client(struct svc_rqst *rqstp); extern int unix_gid_cache_create(struct net *net); extern void unix_gid_cache_destroy(struct net *net); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index a7116048a4d4..7c78ec6356b9 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -35,8 +35,8 @@ struct svc_sock { /* Total length of the data (not including fragment headers) * received so far in the fragments making up this rpc: */ u32 sk_datalen; - /* Number of queued send requests */ - atomic_t sk_sendqlen; + + struct page_frag_cache sk_frag_cache; struct completion sk_handshake_done; @@ -56,8 +56,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk) /* * Function prototypes. */ -void svc_close_net(struct svc_serv *, struct net *); -int svc_recv(struct svc_rqst *, long); +void svc_recv(struct svc_rqst *rqstp); void svc_send(struct svc_rqst *rqstp); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); @@ -66,8 +65,6 @@ int svc_addsock(struct svc_serv *serv, struct net *net, const struct cred *cred); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); -struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); -void svc_sock_destroy(struct svc_xprt *); /* * svc_makesock socket characteristics diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f89ec4b5ea16..2f8dc47f1eb0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -139,6 +139,8 @@ void xdr_terminate_string(const struct xdr_buf *, const u32); size_t xdr_buf_pagecount(const struct xdr_buf *buf); int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); void xdr_free_bvec(struct xdr_buf *buf); +unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size, + const struct xdr_buf *xdr); static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { @@ -224,6 +226,7 @@ struct xdr_stream { struct kvec *iov; /* pointer to the current kvec */ struct kvec scratch; /* Scratch buffer */ struct page **page_ptr; /* pointer to the current page */ + void *page_kaddr; /* kmapped address of the current page */ unsigned int nwords; /* Remaining decode buffer length */ struct rpc_rqst *rqst; /* For debugging */ @@ -255,6 +258,7 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); +extern void xdr_finish_decode(struct xdr_stream *xdr); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); @@ -775,7 +779,7 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - if (len > SIZE_MAX / sizeof(*p)) + if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p)) return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b52411bcfe4e..f85d3a0daca2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -57,6 +57,7 @@ struct xprt_class; struct seq_file; struct svc_serv; struct net; +#include <linux/lwq.h> /* * This describes a complete RPC request @@ -121,7 +122,7 @@ struct rpc_rqst { int rq_ntrans; #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head rq_bc_list; /* Callback service list */ + struct lwq_node rq_bc_list; /* Callback service list */ unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ #endif /* CONFIG_SUNRPC_BACKCHANEL */ @@ -351,6 +352,8 @@ struct xprt_create { struct rpc_xprt_switch *bc_xps; unsigned int flags; struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; }; struct xprt_class { diff --git a/include/linux/superhyway.h b/include/linux/superhyway.h deleted file mode 100644 index 8d3376775813..000000000000 --- a/include/linux/superhyway.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * include/linux/superhyway.h - * - * SuperHyway Bus definitions - * - * Copyright (C) 2004, 2005 Paul Mundt <lethal@linux-sh.org> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#ifndef __LINUX_SUPERHYWAY_H -#define __LINUX_SUPERHYWAY_H - -#include <linux/device.h> - -/* - * SuperHyway IDs - */ -#define SUPERHYWAY_DEVICE_ID_SH5_DMAC 0x0183 - -struct superhyway_vcr_info { - u8 perr_flags; /* P-port Error flags */ - u8 merr_flags; /* Module Error flags */ - u16 mod_vers; /* Module Version */ - u16 mod_id; /* Module ID */ - u8 bot_mb; /* Bottom Memory block */ - u8 top_mb; /* Top Memory block */ -}; - -struct superhyway_ops { - int (*read_vcr)(unsigned long base, struct superhyway_vcr_info *vcr); - int (*write_vcr)(unsigned long base, struct superhyway_vcr_info vcr); -}; - -struct superhyway_bus { - struct superhyway_ops *ops; -}; - -extern struct superhyway_bus superhyway_channels[]; - -struct superhyway_device_id { - unsigned int id; - unsigned long driver_data; -}; - -struct superhyway_device; -extern struct bus_type superhyway_bus_type; - -struct superhyway_driver { - char *name; - - const struct superhyway_device_id *id_table; - struct device_driver drv; - - int (*probe)(struct superhyway_device *dev, const struct superhyway_device_id *id); - void (*remove)(struct superhyway_device *dev); -}; - -#define to_superhyway_driver(d) container_of((d), struct superhyway_driver, drv) - -struct superhyway_device { - char name[32]; - - struct device dev; - - struct superhyway_device_id id; - struct superhyway_driver *drv; - struct superhyway_bus *bus; - - int num_resources; - struct resource *resource; - struct superhyway_vcr_info vcr; -}; - -#define to_superhyway_device(d) container_of((d), struct superhyway_device, dev) - -#define superhyway_get_drvdata(d) dev_get_drvdata(&(d)->dev) -#define superhyway_set_drvdata(d,p) dev_set_drvdata(&(d)->dev, (p)) - -static inline int -superhyway_read_vcr(struct superhyway_device *dev, unsigned long base, - struct superhyway_vcr_info *vcr) -{ - return dev->bus->ops->read_vcr(base, vcr); -} - -static inline int -superhyway_write_vcr(struct superhyway_device *dev, unsigned long base, - struct superhyway_vcr_info vcr) -{ - return dev->bus->ops->write_vcr(base, vcr); -} - -extern int superhyway_scan_bus(struct superhyway_bus *); - -/* drivers/sh/superhyway/superhyway.c */ -int superhyway_register_driver(struct superhyway_driver *); -void superhyway_unregister_driver(struct superhyway_driver *); -int superhyway_add_device(unsigned long base, struct superhyway_device *, struct superhyway_bus *); -int superhyway_add_devices(struct superhyway_bus *bus, struct superhyway_device **devices, int nr_devices); - -/* drivers/sh/superhyway/superhyway-sysfs.c */ -extern const struct attribute_group *superhyway_dev_groups[]; - -#endif /* __LINUX_SUPERHYWAY_H */ - diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index cb7980805920..5b67f0f47d80 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -44,7 +44,7 @@ struct ssam_event { u8 command_id; u8 instance_id; u16 length; - u8 data[]; + u8 data[] __counted_by(length); }; /** diff --git a/include/linux/swait.h b/include/linux/swait.h index 6a8c22b8c2a5..d324419482a0 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -146,7 +146,7 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq) extern void swake_up_one(struct swait_queue_head *q); extern void swake_up_all(struct swait_queue_head *q); -extern void swake_up_locked(struct swait_queue_head *q); +extern void swake_up_locked(struct swait_queue_head *q, int wake_flags); extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state); extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); diff --git a/include/linux/swap.h b/include/linux/swap.h index 456546443f1f..f6dd6575b905 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -298,14 +298,11 @@ struct swap_info_struct { unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ + struct bdev_handle *bdev_handle;/* open handle of the bdev */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ struct completion comp; /* seldom referenced */ -#ifdef CONFIG_FRONTSWAP - unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ - atomic_t frontswap_pages; /* frontswap pages in-use counter */ -#endif spinlock_t lock; /* * protect map scan related fields like * swap_map, lowest_bit, highest_bit, @@ -337,15 +334,13 @@ struct swap_info_struct { */ }; -static inline swp_entry_t folio_swap_entry(struct folio *folio) +static inline swp_entry_t page_swap_entry(struct page *page) { - swp_entry_t entry = { .val = page_private(&folio->page) }; - return entry; -} + struct folio *folio = page_folio(page); + swp_entry_t entry = folio->swap; -static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) -{ - folio->private = (void *)entry.val; + entry.val += folio_page_idx(folio, page); + return entry; } /* linux/mm/workingset.c */ @@ -630,11 +625,6 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) } #endif -#ifdef CONFIG_ZSWAP -extern u64 zswap_pool_total_size; -extern atomic_t zswap_stored_pages; -#endif - #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp); static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index 7ed529a77c5b..99e3ed469e88 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -2,11 +2,6 @@ #ifndef _LINUX_SWAPFILE_H #define _LINUX_SWAPFILE_H -/* - * these were static in swapfile.c but frontswap.c needs them and we don't - * want to expose them to the dozens of source files that include swap.h - */ -extern struct swap_info_struct *swap_info[]; extern unsigned long generic_max_swapfile_size(void); unsigned long arch_max_swapfile_size(void); diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 4c932cb45e0b..bff1e8d97de0 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -393,7 +393,12 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) -#define PTE_MARKER_SWAPIN_ERROR BIT(1) +/* + * "Poisoned" here is meant in the very general sense of "future accesses are + * invalid", instead of referring very specifically to hardware memory errors. + * This marker is meant to represent any of various different causes of this. + */ +#define PTE_MARKER_POISONED BIT(1) #define PTE_MARKER_MASK (BIT(2) - 1) static inline swp_entry_t make_pte_marker_entry(pte_marker marker) @@ -421,15 +426,15 @@ static inline pte_t make_pte_marker(pte_marker marker) return swp_entry_to_pte(make_pte_marker_entry(marker)); } -static inline swp_entry_t make_swapin_error_entry(void) +static inline swp_entry_t make_poisoned_swp_entry(void) { - return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR); + return make_pte_marker_entry(PTE_MARKER_POISONED); } -static inline int is_swapin_error_entry(swp_entry_t entry) +static inline int is_poisoned_swp_entry(swp_entry_t entry) { return is_pte_marker_entry(entry) && - (pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR); + (pte_marker_get(entry) & PTE_MARKER_POISONED); } /* diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 4e52cd5e0bdc..ecde0312dd52 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/limits.h> #include <linux/spinlock.h> +#include <linux/workqueue.h> struct device; struct page; @@ -62,8 +63,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, #ifdef CONFIG_SWIOTLB /** - * struct io_tlb_mem - IO TLB Memory Pool Descriptor - * + * struct io_tlb_pool - IO TLB memory pool descriptor * @start: The start address of the swiotlb memory pool. Used to do a quick * range check to see if the memory was in fact allocated by this * API. @@ -73,19 +73,48 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool * may be remapped in the memory encrypted case and store virtual * address for bounce buffer operation. - * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and - * @end. For default swiotlb, this is command line adjustable via - * setup_io_tlb_npages. - * @list: The free list describing the number of free entries available - * from each index. - * @orig_addr: The original address corresponding to a mapped entry. - * @alloc_size: Size of the allocated buffer. + * @nslabs: The number of IO TLB slots between @start and @end. For the + * default swiotlb, this can be adjusted with a boot parameter, + * see setup_io_tlb_npages(). + * @late_alloc: %true if allocated using the page allocator. + * @nareas: Number of areas in the pool. + * @area_nslabs: Number of slots in each area. + * @areas: Array of memory area descriptors. + * @slots: Array of slot descriptors. + * @node: Member of the IO TLB memory pool list. + * @rcu: RCU head for swiotlb_dyn_free(). + * @transient: %true if transient memory pool. + */ +struct io_tlb_pool { + phys_addr_t start; + phys_addr_t end; + void *vaddr; + unsigned long nslabs; + bool late_alloc; + unsigned int nareas; + unsigned int area_nslabs; + struct io_tlb_area *areas; + struct io_tlb_slot *slots; +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct list_head node; + struct rcu_head rcu; + bool transient; +#endif +}; + +/** + * struct io_tlb_mem - Software IO TLB allocator + * @defpool: Default (initial) IO TLB memory pool descriptor. + * @pool: IO TLB memory pool descriptor (if not dynamic). + * @nslabs: Total number of IO TLB slabs in all pools. * @debugfs: The dentry to debugfs. - * @late_alloc: %true if allocated using the page allocator * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation - * @nareas: The area number in the pool. - * @area_nslabs: The slot number in the area. + * @can_grow: %true if more pools can be allocated dynamically. + * @phys_limit: Maximum allowed physical address. + * @lock: Lock to synchronize changes to the list. + * @pools: List of IO TLB memory pool descriptors (if dynamic). + * @dyn_alloc: Dynamic IO TLB pool allocation work. * @total_used: The total number of slots in the pool that are currently used * across all areas. Used only for calculating used_hiwater in * debugfs. @@ -93,30 +122,73 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * in debugfs. */ struct io_tlb_mem { - phys_addr_t start; - phys_addr_t end; - void *vaddr; + struct io_tlb_pool defpool; unsigned long nslabs; struct dentry *debugfs; - bool late_alloc; bool force_bounce; bool for_alloc; - unsigned int nareas; - unsigned int area_nslabs; - struct io_tlb_area *areas; - struct io_tlb_slot *slots; +#ifdef CONFIG_SWIOTLB_DYNAMIC + bool can_grow; + u64 phys_limit; + spinlock_t lock; + struct list_head pools; + struct work_struct dyn_alloc; +#endif #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; atomic_long_t used_hiwater; #endif }; -extern struct io_tlb_mem io_tlb_default_mem; +#ifdef CONFIG_SWIOTLB_DYNAMIC + +struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr); + +#else + +static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, + phys_addr_t paddr) +{ + return &dev->dma_io_tlb_mem->defpool; +} + +#endif + +/** + * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb + * @dev: Device which has mapped the buffer. + * @paddr: Physical address within the DMA buffer. + * + * Check if @paddr points into a bounce buffer. + * + * Return: + * * %true if @paddr points into a bounce buffer + * * %false otherwise + */ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - return mem && paddr >= mem->start && paddr < mem->end; + if (!mem) + return false; + +#ifdef CONFIG_SWIOTLB_DYNAMIC + /* + * All SWIOTLB buffer addresses must have been returned by + * swiotlb_tbl_map_single() and passed to a device driver. + * If a SWIOTLB address is checked on another CPU, then it was + * presumably loaded by the device driver from an unspecified private + * data structure. Make sure that this load is ordered before reading + * dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool(). + * + * This barrier pairs with smp_mb() in swiotlb_find_slots(). + */ + smp_rmb(); + return READ_ONCE(dev->dma_uses_io_tlb) && + swiotlb_find_pool(dev, paddr); +#else + return paddr >= mem->defpool.start && paddr < mem->defpool.end; +#endif } static inline bool is_swiotlb_force_bounce(struct device *dev) @@ -128,13 +200,22 @@ static inline bool is_swiotlb_force_bounce(struct device *dev) void swiotlb_init(bool addressing_limited, unsigned int flags); void __init swiotlb_exit(void); +void swiotlb_dev_init(struct device *dev); size_t swiotlb_max_mapping_size(struct device *dev); +bool is_swiotlb_allocated(void); bool is_swiotlb_active(struct device *dev); void __init swiotlb_adjust_size(unsigned long size); +phys_addr_t default_swiotlb_base(void); +phys_addr_t default_swiotlb_limit(void); #else static inline void swiotlb_init(bool addressing_limited, unsigned int flags) { } + +static inline void swiotlb_dev_init(struct device *dev) +{ +} + static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { return false; @@ -151,6 +232,11 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev) return SIZE_MAX; } +static inline bool is_swiotlb_allocated(void) +{ + return false; +} + static inline bool is_swiotlb_active(struct device *dev) { return false; @@ -159,6 +245,16 @@ static inline bool is_swiotlb_active(struct device *dev) static inline void swiotlb_adjust_size(unsigned long size) { } + +static inline phys_addr_t default_swiotlb_base(void) +{ + return 0; +} + +static inline phys_addr_t default_swiotlb_limit(void) +{ + return 0; +} #endif /* CONFIG_SWIOTLB */ extern void swiotlb_print_info(void); diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 48fabe36509e..8d8fac1626bd 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -41,6 +41,7 @@ enum { enum switchtec_gen { SWITCHTEC_GEN3, SWITCHTEC_GEN4, + SWITCHTEC_GEN5, }; struct mrpc_regs { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 03e3d0121d5e..fd9d12de7e92 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -284,22 +284,6 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #endif /* - * Called before coming back to user-mode. Returning to user-mode with an - * address limit different than USER_DS can allow to overwrite kernel memory. - */ -static inline void addr_limit_user_check(void) -{ -#ifdef TIF_FSCHECK - if (!test_thread_flag(TIF_FSCHECK)) - return; -#endif - -#ifdef TIF_FSCHECK - clear_thread_flag(TIF_FSCHECK); -#endif -} - -/* * These syscall function prototypes are kept in the same order as * include/uapi/asm-generic/unistd.h. Architecture specific entries go below, * followed by deprecated or obsolete system calls. @@ -371,7 +355,6 @@ asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); -asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_epoll_create1(int flags); asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, @@ -438,8 +421,10 @@ asmlinkage long sys_chdir(const char __user *filename); asmlinkage long sys_fchdir(unsigned int fd); asmlinkage long sys_chroot(const char __user *filename); asmlinkage long sys_fchmod(unsigned int fd, umode_t mode); -asmlinkage long sys_fchmodat(int dfd, const char __user * filename, +asmlinkage long sys_fchmodat(int dfd, const char __user *filename, umode_t mode); +asmlinkage long sys_fchmodat2(int dfd, const char __user *filename, + umode_t mode, unsigned int flags); asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); @@ -563,6 +548,16 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); + +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); + +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask, + unsigned int flags, struct __kernel_timespec __user *timespec, + clockid_t clockid); + +asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters, + unsigned int flags, int nr_wake, int nr_requeue); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, @@ -953,6 +948,7 @@ asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long l asmlinkage long sys_cachestat(unsigned int fd, struct cachestat_range __user *cstat_range, struct cachestat __user *cstat, unsigned int flags); +asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); /* * Architecture-specific system calls diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 59d451f455bf..61b40ea81f4d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -159,12 +159,22 @@ struct ctl_node { struct ctl_table_header *header; }; -/* struct ctl_table_header is used to maintain dynamic lists of - struct ctl_table trees. */ +/** + * struct ctl_table_header - maintains dynamic lists of struct ctl_table trees + * @ctl_table: pointer to the first element in ctl_table array + * @ctl_table_size: number of elements pointed by @ctl_table + * @used: The entry will never be touched when equal to 0. + * @count: Upped every time something is added to @inodes and downed every time + * something is removed from inodes + * @nreg: When nreg drops to 0 the ctl_table_header will be unregistered. + * @rcu: Delays the freeing of the inode. Introduced with "unfuck proc_sysctl ->d_compare()" + * + */ struct ctl_table_header { union { struct { struct ctl_table *ctl_table; + int ctl_table_size; int used; int count; int nreg; @@ -205,6 +215,9 @@ struct ctl_path { const char *procname; }; +#define register_sysctl(path, table) \ + register_sysctl_sz(path, table, ARRAY_SIZE(table)) + #ifdef CONFIG_SYSCTL void proc_sys_poll_notify(struct ctl_table_poll *poll); @@ -216,17 +229,20 @@ extern void retire_sysctl_set(struct ctl_table_set *set); struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, - const char *path, struct ctl_table *table); -struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table); + const char *path, struct ctl_table *table, size_t table_size); +struct ctl_table_header *register_sysctl_sz(const char *path, struct ctl_table *table, + size_t table_size); void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init_bases(void); extern void __register_sysctl_init(const char *path, struct ctl_table *table, - const char *table_name); -#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) + const char *table_name, size_t table_size); +#define register_sysctl_init(path, table) \ + __register_sysctl_init(path, table, #table, ARRAY_SIZE(table)) extern struct ctl_table_header *register_sysctl_mount_point(const char *path); void do_sysctl_args(void); +bool sysctl_is_alias(char *param); int do_proc_douintvec(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, @@ -252,7 +268,9 @@ static inline struct ctl_table_header *register_sysctl_mount_point(const char *p return NULL; } -static inline struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) +static inline struct ctl_table_header *register_sysctl_sz(const char *path, + struct ctl_table *table, + size_t table_size) { return NULL; } @@ -270,6 +288,11 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, static inline void do_sysctl_args(void) { } + +static inline bool sysctl_is_alias(char *param) +{ + return false; +} #endif /* CONFIG_SYSCTL */ int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index c1ef5fc60a3c..19cb803dd5ec 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -9,7 +9,8 @@ #include <linux/kernel.h> #include <linux/platform_data/simplefb.h> -#include <linux/screen_info.h> + +struct screen_info; enum { M_I17, /* 17-Inch iMac */ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fd3fe5c8c17f..b717a70219f6 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -181,6 +181,8 @@ struct bin_attribute { char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); + loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *, + loff_t, int); int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr, struct vm_area_struct *vma); }; diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 3a582ec7a2f1..bdca467ebb77 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -30,7 +30,7 @@ #define SYSRQ_ENABLE_RTNICE 0x0100 struct sysrq_key_op { - void (* const handler)(int); + void (* const handler)(u8); const char * const help_msg; const char * const action_msg; const int enable_mask; @@ -43,10 +43,10 @@ struct sysrq_key_op { * are available -- else NULL's). */ -void handle_sysrq(int key); -void __handle_sysrq(int key, bool check_mask); -int register_sysrq_key(int key, const struct sysrq_key_op *op); -int unregister_sysrq_key(int key, const struct sysrq_key_op *op); +void handle_sysrq(u8 key); +void __handle_sysrq(u8 key, bool check_mask); +int register_sysrq_key(u8 key, const struct sysrq_key_op *op); +int unregister_sysrq_key(u8 key, const struct sysrq_key_op *op); extern const struct sysrq_key_op *__sysrq_reboot_op; int sysrq_toggle_support(int enable_mask); @@ -54,20 +54,20 @@ int sysrq_mask(void); #else -static inline void handle_sysrq(int key) +static inline void handle_sysrq(u8 key) { } -static inline void __handle_sysrq(int key, bool check_mask) +static inline void __handle_sysrq(u8 key, bool check_mask) { } -static inline int register_sysrq_key(int key, const struct sysrq_key_op *op) +static inline int register_sysrq_key(u8 key, const struct sysrq_key_op *op) { return -EINVAL; } -static inline int unregister_sysrq_key(int key, const struct sysrq_key_op *op) +static inline int unregister_sysrq_key(u8 key, const struct sysrq_key_op *op) { return -EINVAL; } diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 91a37c99ba66..b646b574b060 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -152,6 +152,7 @@ struct tcp_request_sock { u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; bool is_mptcp; + bool req_usec_ts; #if IS_ENABLED(CONFIG_MPTCP) bool drop_req; #endif @@ -165,6 +166,11 @@ struct tcp_request_sock { * after data-in-SYN. */ u8 syn_tos; +#ifdef CONFIG_TCP_AO + u8 ao_keyid; + u8 ao_rcv_next; + bool used_tcp_ao; +#endif }; static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) @@ -172,6 +178,17 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) return (struct tcp_request_sock *)req; } +static inline bool tcp_rsk_used_ao(const struct request_sock *req) +{ +#ifndef CONFIG_TCP_AO + return false; +#else + return tcp_rsk(req)->used_tcp_ao; +#endif +} + +#define TCP_RMEM_TO_WIN_SCALE 8 + struct tcp_sock { /* inet_connection_sock has to be the first member of tcp_sock */ struct inet_connection_sock inet_conn; @@ -238,7 +255,7 @@ struct tcp_sock { u32 window_clamp; /* Maximal window to advertise */ u32 rcv_ssthresh; /* Current window clamp */ - + u8 scaling_ratio; /* see tcp_win_from_space() */ /* Information of the most recently (s)acked skb */ struct tcp_rack { u64 mstamp; /* (Re)sent time of the skb */ @@ -255,7 +272,8 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - unused:5; + tcp_usec_ts:1, /* TSval values in usec */ + unused:4; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ @@ -375,6 +393,14 @@ struct tcp_sock { * Total data bytes retransmitted */ u32 total_retrans; /* Total retransmits for entire connection */ + u32 rto_stamp; /* Start time (ms) of last CA_Loss recovery */ + u16 total_rto; /* Total number of RTO timeouts, including + * SYN/SYN-ACK and recurring timeouts. + */ + u16 total_rto_recoveries; /* Total number of RTO recoveries, + * including any unfinished recovery. + */ + u32 total_rto_time; /* ms spent in (completed) RTO recoveries. */ u32 urg_seq; /* Seq of received urgent pointer */ unsigned int keepalive_time; /* time before keep alive takes place */ @@ -435,13 +461,18 @@ struct tcp_sock { bool syn_smc; /* SYN includes SMC */ #endif -#ifdef CONFIG_TCP_MD5SIG -/* TCP AF-Specific parts; only used by MD5 Signature support so far */ +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +/* TCP AF-Specific parts; only used by TCP-AO/MD5 Signature support so far */ const struct tcp_sock_af_ops *af_specific; +#ifdef CONFIG_TCP_MD5SIG /* TCP MD5 Signature Option information */ struct tcp_md5sig_info __rcu *md5sig_info; #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info __rcu *ao_info; +#endif +#endif /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; @@ -461,15 +492,17 @@ enum tsq_enum { TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call * tcp_v{4|6}_mtu_reduced() */ + TCP_ACK_DEFERRED, /* TX pure ack is deferred */ }; enum tsq_flags { - TSQF_THROTTLED = (1UL << TSQ_THROTTLED), - TSQF_QUEUED = (1UL << TSQ_QUEUED), - TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), - TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), - TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), - TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), + TSQF_THROTTLED = BIT(TSQ_THROTTLED), + TSQF_QUEUED = BIT(TSQ_QUEUED), + TCPF_TSQ_DEFERRED = BIT(TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = BIT(TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = BIT(TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = BIT(TCP_MTU_REDUCED_DEFERRED), + TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED), }; #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) @@ -495,6 +528,9 @@ struct tcp_timewait_sock { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info __rcu *ao_info; +#endif }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) @@ -562,6 +598,11 @@ void __tcp_sock_set_nodelay(struct sock *sk, bool on); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); -void tcp_sock_set_user_timeout(struct sock *sk, u32 val); +int tcp_sock_set_user_timeout(struct sock *sk, int val); + +static inline bool dst_tcp_usec_ts(const struct dst_entry *dst) +{ + return dst_feature(dst, RTAX_FEATURE_TCP_USEC_TS); +} #endif /* _LINUX_TCP_H */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index dee66ade89a0..cee814d5d1ac 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -53,6 +53,20 @@ enum thermal_notify_event { THERMAL_EVENT_KEEP_ALIVE, /* Request for user space handler to respond */ }; +/** + * struct thermal_trip - representation of a point in temperature domain + * @temperature: temperature value in miliCelsius + * @hysteresis: relative hysteresis in miliCelsius + * @type: trip point type + * @priv: pointer to driver data associated with this trip + */ +struct thermal_trip { + int temperature; + int hysteresis; + enum thermal_trip_type type; + void *priv; +}; + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); @@ -62,32 +76,16 @@ struct thermal_zone_device_ops { int (*set_trips) (struct thermal_zone_device *, int, int); int (*change_mode) (struct thermal_zone_device *, enum thermal_device_mode); - int (*get_trip_type) (struct thermal_zone_device *, int, - enum thermal_trip_type *); - int (*get_trip_temp) (struct thermal_zone_device *, int, int *); int (*set_trip_temp) (struct thermal_zone_device *, int, int); - int (*get_trip_hyst) (struct thermal_zone_device *, int, int *); int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); - int (*get_trend) (struct thermal_zone_device *, int, - enum thermal_trend *); + int (*get_trend) (struct thermal_zone_device *, + const struct thermal_trip *, enum thermal_trend *); void (*hot)(struct thermal_zone_device *); void (*critical)(struct thermal_zone_device *); }; -/** - * struct thermal_trip - representation of a point in temperature domain - * @temperature: temperature value in miliCelsius - * @hysteresis: relative hysteresis in miliCelsius - * @type: trip point type - */ -struct thermal_trip { - int temperature; - int hysteresis; - enum thermal_trip_type type; -}; - struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); @@ -124,7 +122,6 @@ struct thermal_cooling_device { * @devdata: private pointer for device private data * @trips: an array of struct thermal_trip * @num_trips: number of trip points the thermal zone supports - * @trips_disabled; bitmap for disabled trips * @passive_delay_jiffies: number of jiffies to wait between polls when * performing passive cooling. * @polling_delay_jiffies: number of jiffies to wait between polls when @@ -165,7 +162,6 @@ struct thermal_zone_device { void *devdata; struct thermal_trip *trips; int num_trips; - unsigned long trips_disabled; /* bitmap for disabled trips */ unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; int temperature; @@ -203,7 +199,8 @@ struct thermal_governor { char name[THERMAL_NAME_LENGTH]; int (*bind_to_tz)(struct thermal_zone_device *tz); void (*unbind_from_tz)(struct thermal_zone_device *tz); - int (*throttle)(struct thermal_zone_device *tz, int trip); + int (*throttle)(struct thermal_zone_device *tz, + const struct thermal_trip *trip); struct list_head governor_list; }; @@ -287,6 +284,12 @@ int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id, const struct thermal_trip *trip); +int for_each_thermal_trip(struct thermal_zone_device *tz, + int (*cb)(struct thermal_trip *, void *), + void *data); +int thermal_zone_for_each_trip(struct thermal_zone_device *tz, + int (*cb)(struct thermal_trip *, void *), + void *data); int thermal_zone_get_num_trips(struct thermal_zone_device *tz); int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); @@ -299,26 +302,40 @@ int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); #endif #ifdef CONFIG_THERMAL -struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, - void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); - -void thermal_zone_device_unregister(struct thermal_zone_device *); - -struct thermal_zone_device * -thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int, - void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); +struct thermal_zone_device *thermal_zone_device_register_with_trips( + const char *type, + struct thermal_trip *trips, + int num_trips, int mask, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay); + +struct thermal_zone_device *thermal_tripless_zone_device_register( + const char *type, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp); + +void thermal_zone_device_unregister(struct thermal_zone_device *tz); void *thermal_zone_device_priv(struct thermal_zone_device *tzd); const char *thermal_zone_device_type(struct thermal_zone_device *tzd); int thermal_zone_device_id(struct thermal_zone_device *tzd); struct device *thermal_zone_device(struct thermal_zone_device *tzd); +int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev, + unsigned long upper, unsigned long lower, + unsigned int weight); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, unsigned long, unsigned long, unsigned int); +int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, @@ -345,15 +362,26 @@ int thermal_zone_device_enable(struct thermal_zone_device *tz); int thermal_zone_device_disable(struct thermal_zone_device *tz); void thermal_zone_device_critical(struct thermal_zone_device *tz); #else -static inline struct thermal_zone_device *thermal_zone_device_register( - const char *type, int trips, int mask, void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay) +static inline struct thermal_zone_device *thermal_zone_device_register_with_trips( + const char *type, + struct thermal_trip *trips, + int num_trips, int mask, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay) { return ERR_PTR(-ENODEV); } -static inline void thermal_zone_device_unregister( - struct thermal_zone_device *tz) + +static inline struct thermal_zone_device *thermal_tripless_zone_device_register( + const char *type, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp) +{ return ERR_PTR(-ENODEV); } + +static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz) { } + static inline struct thermal_cooling_device * thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 02333f47c994..6151c210d987 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -175,7 +175,7 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * enum tb_link_width - Thunderbolt/USB4 link width * @TB_LINK_WIDTH_SINGLE: Single lane link * @TB_LINK_WIDTH_DUAL: Dual lane symmetric link - * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 trasmitters + * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 transmitters * @TB_LINK_WIDTH_ASYM_RX: Dual lane asymmetric Gen 4 link with 3 receivers */ enum tb_link_width { diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index 44a7f9169ac6..10642d4844f0 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -271,7 +271,7 @@ long st_kim_stop(void *); void st_kim_complete(void *); void kim_st_list_protocols(struct st_data_s *, void *); -void st_kim_recv(void *, const unsigned char *, long); +void st_kim_recv(void *disc_data, const u8 *data, size_t count); /* diff --git a/include/linux/tick.h b/include/linux/tick.h index 9459fef5b857..716d17f31c45 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -140,14 +140,6 @@ extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); - -static inline void tick_nohz_idle_stop_tick_protected(void) -{ - local_irq_disable(); - tick_nohz_idle_stop_tick(); - local_irq_enable(); -} - #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } @@ -170,8 +162,6 @@ static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } - -static inline void tick_nohz_idle_stop_tick_protected(void) { } #endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL diff --git a/include/linux/timer.h b/include/linux/timer.h index 9162f275819a..26a545bb0153 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -77,8 +77,7 @@ struct timer_list { .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .flags = (_flags), \ - __TIMER_LOCKDEP_MAP_INITIALIZER( \ - __FILE__ ":" __stringify(__LINE__)) \ + __TIMER_LOCKDEP_MAP_INITIALIZER(FILE_LINE) \ } #define DEFINE_TIMER(_name, _function) \ diff --git a/include/linux/topology.h b/include/linux/topology.h index fea32377f7c7..52f5850730b3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -251,7 +251,7 @@ extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int #else static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) { - return cpumask_nth(cpu, cpus); + return cpumask_nth_and(cpu, cpus, cpu_online_mask); } static inline const struct cpumask * diff --git a/include/linux/torture.h b/include/linux/torture.h index 7038104463e4..c98d0c83d117 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -81,7 +81,8 @@ static inline void torture_random_init(struct torture_random_state *trsp) } /* Definitions for high-resolution-timer sleeps. */ -int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp); +int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode, + struct torture_random_state *trsp); int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state *trsp); int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state *trsp); int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp); @@ -108,19 +109,27 @@ bool torture_must_stop(void); bool torture_must_stop_irq(void); void torture_kthread_stopping(char *title); int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, - char *f, struct task_struct **tp); + char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp)); void _torture_stop_kthread(char *m, struct task_struct **tp); #define torture_create_kthread(n, arg, tp) \ _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \ - "Failed to create " #n, &(tp)) + "Failed to create " #n, &(tp), NULL) +#define torture_create_kthread_cb(n, arg, tp, cbf) \ + _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \ + "Failed to create " #n, &(tp), cbf) #define torture_stop_kthread(n, tp) \ _torture_stop_kthread("Stopping " #n " task", &(tp)) +/* Scheduler-related definitions. */ #ifdef CONFIG_PREEMPTION #define torture_preempt_schedule() __preempt_schedule() #else #define torture_preempt_schedule() do { } while (0) #endif +#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +#endif + #endif /* __LINUX_TORTURE_H */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 1e8bbdb8da90..d68ff9b1247f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -62,13 +62,13 @@ void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...); /* Used to find the offset and length of dynamic fields in trace events */ struct trace_dynamic_info { #ifdef CONFIG_CPU_BIG_ENDIAN - u16 offset; u16 len; + u16 offset; #else - u16 len; u16 offset; + u16 len; #endif -}; +} __packed; /* * The trace entry - the most basic unit of tracing. This is what @@ -492,6 +492,7 @@ enum { EVENT_FILE_FL_TRIGGER_COND_BIT, EVENT_FILE_FL_PID_FILTER_BIT, EVENT_FILE_FL_WAS_ENABLED_BIT, + EVENT_FILE_FL_FREED_BIT, }; extern struct trace_event_file *trace_get_event_file(const char *instance, @@ -630,6 +631,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...); * TRIGGER_COND - When set, one or more triggers has an associated filter * PID_FILTER - When set, the event is filtered based on pid * WAS_ENABLED - Set when enabled to know to clear trace on module removal + * FREED - File descriptor is freed, all fields should be considered invalid */ enum { EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), @@ -643,13 +645,14 @@ enum { EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT), + EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT), }; struct trace_event_file { struct list_head list; struct trace_event_call *event_call; struct event_filter __rcu *filter; - struct dentry *dir; + struct eventfs_inode *ei; struct trace_array *tr; struct trace_subsystem_dir *system; struct list_head triggers; @@ -671,6 +674,7 @@ struct trace_event_file { * caching and such. Which is mostly OK ;-) */ unsigned long flags; + atomic_t ref; /* ref count for opened files */ atomic_t sm_ref; /* soft-mode reference counter */ atomic_t tm_ref; /* trigger-mode reference counter */ }; @@ -761,8 +765,10 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr); + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -800,7 +806,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, - u64 *probe_addr) + u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } @@ -809,6 +815,11 @@ bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EOPNOTSUPP; } +static inline int +bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif enum { @@ -818,6 +829,7 @@ enum { FILTER_RDYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, + FILTER_CPUMASK, FILTER_COMM, FILTER_CPU, FILTER_STACKTRACE, @@ -870,6 +882,7 @@ extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, + unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS @@ -878,7 +891,8 @@ extern int perf_uprobe_init(struct perf_event *event, extern void perf_uprobe_destroy(struct perf_event *event); extern int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, const char **filename, - u64 *probe_offset, bool perf_type_tracepoint); + u64 *probe_offset, u64 *probe_addr, + bool perf_type_tracepoint); #endif extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 6be92bf559fe..3691e0e76a1a 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -14,6 +14,7 @@ struct trace_seq { char buffer[PAGE_SIZE]; struct seq_buf seq; + size_t readpos; int full; }; @@ -22,6 +23,7 @@ trace_seq_init(struct trace_seq *s) { seq_buf_init(&s->seq, s->buffer, PAGE_SIZE); s->full = 0; + s->readpos = 0; } /** diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 99912445974c..7a5fe17b6bf9 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -21,6 +21,72 @@ struct file_operations; #ifdef CONFIG_TRACING +struct eventfs_file; + +/** + * eventfs_callback - A callback function to create dynamic files in eventfs + * @name: The name of the file that is to be created + * @mode: return the file mode for the file (RW access, etc) + * @data: data to pass to the created file ops + * @fops: the file operations of the created file + * + * The evetnfs files are dynamically created. The struct eventfs_entry array + * is passed to eventfs_create_dir() or eventfs_create_events_dir() that will + * be used to create the files within those directories. When a lookup + * or access to a file within the directory is made, the struct eventfs_entry + * array is used to find a callback() with the matching name that is being + * referenced (for lookups, the entire array is iterated and each callback + * will be called). + * + * The callback will be called with @name for the name of the file to create. + * The callback can return less than 1 to indicate that no file should be + * created. + * + * If a file is to be created, then @mode should be populated with the file + * mode (permissions) for which the file is created for. This would be + * used to set the created inode i_mode field. + * + * The @data should be set to the data passed to the other file operations + * (read, write, etc). Note, @data will also point to the data passed in + * to eventfs_create_dir() or eventfs_create_events_dir(), but the callback + * can replace the data if it chooses to. Otherwise, the original data + * will be used for the file operation functions. + * + * The @fops should be set to the file operations that will be used to create + * the inode. + * + * NB. This callback is called while holding internal locks of the eventfs + * system. The callback must not call any code that might also call into + * the tracefs or eventfs system or it will risk creating a deadlock. + */ +typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, + const struct file_operations **fops); + +/** + * struct eventfs_entry - dynamically created eventfs file call back handler + * @name: Then name of the dynamic file in an eventfs directory + * @callback: The callback to get the fops of the file when it is created + * + * See evenfs_callback() typedef for how to set up @callback. + */ +struct eventfs_entry { + const char *name; + eventfs_callback callback; +}; + +struct eventfs_inode; + +struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, + const struct eventfs_entry *entries, + int size, void *data); + +struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, + const struct eventfs_entry *entries, + int size, void *data); + +void eventfs_remove_events_dir(struct eventfs_inode *ei); +void eventfs_remove_dir(struct eventfs_inode *ei); + struct dentry *tracefs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); diff --git a/include/linux/tsm.h b/include/linux/tsm.h new file mode 100644 index 000000000000..de8324a2223c --- /dev/null +++ b/include/linux/tsm.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TSM_H +#define __TSM_H + +#include <linux/sizes.h> +#include <linux/types.h> + +#define TSM_INBLOB_MAX 64 +#define TSM_OUTBLOB_MAX SZ_32K + +/* + * Privilege level is a nested permission concept to allow confidential + * guests to partition address space, 4-levels are supported. + */ +#define TSM_PRIVLEVEL_MAX 3 + +/** + * struct tsm_desc - option descriptor for generating tsm report blobs + * @privlevel: optional privilege level to associate with @outblob + * @inblob_len: sizeof @inblob + * @inblob: arbitrary input data + */ +struct tsm_desc { + unsigned int privlevel; + size_t inblob_len; + u8 inblob[TSM_INBLOB_MAX]; +}; + +/** + * struct tsm_report - track state of report generation relative to options + * @desc: input parameters to @report_new() + * @outblob_len: sizeof(@outblob) + * @outblob: generated evidence to provider to the attestation agent + * @auxblob_len: sizeof(@auxblob) + * @auxblob: (optional) auxiliary data to the report (e.g. certificate data) + */ +struct tsm_report { + struct tsm_desc desc; + size_t outblob_len; + u8 *outblob; + size_t auxblob_len; + u8 *auxblob; +}; + +/** + * struct tsm_ops - attributes and operations for tsm instances + * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider + * @privlevel_floor: convey base privlevel for nested scenarios + * @report_new: Populate @report with the report blob and auxblob + * (optional), return 0 on successful population, or -errno otherwise + * + * Implementation specific ops, only one is expected to be registered at + * a time i.e. only one of "sev-guest", "tdx-guest", etc. + */ +struct tsm_ops { + const char *name; + const unsigned int privlevel_floor; + int (*report_new)(struct tsm_report *report, void *data); +}; + +extern const struct config_item_type tsm_report_default_type; + +/* publish @privlevel, @privlevel_floor, and @auxblob attributes */ +extern const struct config_item_type tsm_report_extra_type; + +int tsm_register(const struct tsm_ops *ops, void *priv, + const struct config_item_type *type); +int tsm_unregister(const struct tsm_ops *ops); +#endif /* __TSM_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index e8d5d9997aca..4b6340ac2af2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -192,13 +192,14 @@ struct tty_operations; */ struct tty_struct { struct kref kref; + int index; struct device *dev; struct tty_driver *driver; + struct tty_port *port; const struct tty_operations *ops; - int index; - struct ld_semaphore ldisc_sem; struct tty_ldisc *ldisc; + struct ld_semaphore ldisc_sem; struct mutex atomic_write_lock; struct mutex legacy_mutex; @@ -209,6 +210,7 @@ struct tty_struct { char name[64]; unsigned long flags; int count; + unsigned int receive_room; struct winsize winsize; struct { @@ -219,16 +221,16 @@ struct tty_struct { } __aligned(sizeof(unsigned long)) flow; struct { - spinlock_t lock; struct pid *pgrp; struct pid *session; + spinlock_t lock; unsigned char pktstatus; bool packet; unsigned long unused[0]; } __aligned(sizeof(unsigned long)) ctrl; bool hw_stopped; - unsigned int receive_room; + bool closing; int flow_change; struct tty_struct *link; @@ -239,15 +241,13 @@ struct tty_struct { void *disc_data; void *driver_data; spinlock_t files_lock; + int write_cnt; + unsigned char *write_buf; + struct list_head tty_files; #define N_TTY_BUF_SIZE 4096 - - int closing; - unsigned char *write_buf; - int write_cnt; struct work_struct SAK_work; - struct tty_port *port; } __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ @@ -390,14 +390,12 @@ int vcs_init(void); extern const struct class tty_class; /** - * tty_kref_get - get a tty reference - * @tty: tty device + * tty_kref_get - get a tty reference + * @tty: tty device * - * Return a new reference to a tty object. The caller must hold - * sufficient locks/counts to ensure that their existing reference cannot - * go away + * Returns: a new reference to a tty object. The caller must hold sufficient + * locks/counts to ensure that their existing reference cannot go away */ - static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) @@ -416,8 +414,8 @@ unsigned int tty_chars_in_buffer(struct tty_struct *tty); unsigned int tty_write_room(struct tty_struct *tty); void tty_driver_flush_buffer(struct tty_struct *tty); void tty_unthrottle(struct tty_struct *tty); -int tty_throttle_safe(struct tty_struct *tty); -int tty_unthrottle_safe(struct tty_struct *tty); +bool tty_throttle_safe(struct tty_struct *tty); +bool tty_unthrottle_safe(struct tty_struct *tty); int tty_do_resize(struct tty_struct *tty, struct winsize *ws); int tty_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount); @@ -435,14 +433,13 @@ void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); /** - * tty_get_baud_rate - get tty bit rates - * @tty: tty to query + * tty_get_baud_rate - get tty bit rates + * @tty: tty to query * - * Returns the baud rate as an integer for this terminal. The - * termios lock must be held by the caller and the terminal bit - * flags may be updated. + * Returns: the baud rate as an integer for this terminal. The termios lock + * must be held by the caller and the terminal bit flags may be updated. * - * Locking: none + * Locking: none */ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) { diff --git a/include/linux/tty_buffer.h b/include/linux/tty_buffer.h index 6ceb2789e6c8..31125e3be3c5 100644 --- a/include/linux/tty_buffer.h +++ b/include/linux/tty_buffer.h @@ -12,24 +12,24 @@ struct tty_buffer { struct tty_buffer *next; struct llist_node free; }; - int used; - int size; - int commit; - int lookahead; /* Lazy update on recv, can become less than "read" */ - int read; + unsigned int used; + unsigned int size; + unsigned int commit; + unsigned int lookahead; /* Lazy update on recv, can become less than "read" */ + unsigned int read; bool flags; /* Data points here */ - unsigned long data[]; + u8 data[] __aligned(sizeof(unsigned long)); }; -static inline unsigned char *char_buf_ptr(struct tty_buffer *b, int ofs) +static inline u8 *char_buf_ptr(struct tty_buffer *b, unsigned int ofs) { - return ((unsigned char *)b->data) + ofs; + return b->data + ofs; } -static inline char *flag_buf_ptr(struct tty_buffer *b, int ofs) +static inline u8 *flag_buf_ptr(struct tty_buffer *b, unsigned int ofs) { - return (char *)char_buf_ptr(b, ofs) + b->size; + return char_buf_ptr(b, ofs) + b->size; } struct tty_bufhead { diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index e00034118c7b..18beff0cec1a 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -72,8 +72,8 @@ struct serial_struct; * is closed for the last time freeing up the resources. This is * actually the second part of shutdown for routines that might sleep. * - * @write: ``int ()(struct tty_struct *tty, const unsigned char *buf, - * int count)`` + * @write: ``ssize_t ()(struct tty_struct *tty, const unsigned char *buf, + * size_t count)`` * * This routine is called by the kernel to write a series (@count) of * characters (@buf) to the @tty device. The characters may come from @@ -356,9 +356,8 @@ struct tty_operations { void (*close)(struct tty_struct * tty, struct file * filp); void (*shutdown)(struct tty_struct *tty); void (*cleanup)(struct tty_struct *tty); - int (*write)(struct tty_struct * tty, - const unsigned char *buf, int count); - int (*put_char)(struct tty_struct *tty, unsigned char ch); + ssize_t (*write)(struct tty_struct *tty, const u8 *buf, size_t count); + int (*put_char)(struct tty_struct *tty, u8 ch); void (*flush_chars)(struct tty_struct *tty); unsigned int (*write_room)(struct tty_struct *tty); unsigned int (*chars_in_buffer)(struct tty_struct *tty); diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index bfaaeee61a05..af4fce98f64e 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -10,17 +10,59 @@ struct tty_ldisc; int tty_buffer_set_limit(struct tty_port *port, int limit); unsigned int tty_buffer_space_avail(struct tty_port *port); int tty_buffer_request_room(struct tty_port *port, size_t size); -int tty_insert_flip_string_flags(struct tty_port *port, - const unsigned char *chars, const char *flags, size_t size); -int tty_insert_flip_string_fixed_flag(struct tty_port *port, - const unsigned char *chars, char flag, size_t size); -int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, - size_t size); +size_t __tty_insert_flip_string_flags(struct tty_port *port, const u8 *chars, + const u8 *flags, bool mutable_flags, + size_t size); +size_t tty_prepare_flip_string(struct tty_port *port, u8 **chars, size_t size); void tty_flip_buffer_push(struct tty_port *port); -int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); -static inline int tty_insert_flip_char(struct tty_port *port, - unsigned char ch, char flag) +/** + * tty_insert_flip_string_fixed_flag - add characters to the tty buffer + * @port: tty port + * @chars: characters + * @flag: flag value for each character + * @size: size + * + * Queue a series of bytes to the tty buffering. All the characters passed are + * marked with the supplied flag. + * + * Returns: the number added. + */ +static inline size_t tty_insert_flip_string_fixed_flag(struct tty_port *port, + const u8 *chars, u8 flag, + size_t size) +{ + return __tty_insert_flip_string_flags(port, chars, &flag, false, size); +} + +/** + * tty_insert_flip_string_flags - add characters to the tty buffer + * @port: tty port + * @chars: characters + * @flags: flag bytes + * @size: size + * + * Queue a series of bytes to the tty buffering. For each character the flags + * array indicates the status of the character. + * + * Returns: the number added. + */ +static inline size_t tty_insert_flip_string_flags(struct tty_port *port, + const u8 *chars, + const u8 *flags, size_t size) +{ + return __tty_insert_flip_string_flags(port, chars, flags, true, size); +} + +/** + * tty_insert_flip_char - add one character to the tty buffer + * @port: tty port + * @ch: character + * @flag: flag byte + * + * Queue a single byte @ch to the tty buffering, with an optional flag. + */ +static inline size_t tty_insert_flip_char(struct tty_port *port, u8 ch, u8 flag) { struct tty_buffer *tb = port->buf.tail; int change; @@ -32,17 +74,17 @@ static inline int tty_insert_flip_char(struct tty_port *port, *char_buf_ptr(tb, tb->used++) = ch; return 1; } - return __tty_insert_flip_char(port, ch, flag); + return __tty_insert_flip_string_flags(port, &ch, &flag, false, 1); } -static inline int tty_insert_flip_string(struct tty_port *port, - const unsigned char *chars, size_t size) +static inline size_t tty_insert_flip_string(struct tty_port *port, + const u8 *chars, size_t size) { return tty_insert_flip_string_fixed_flag(port, chars, TTY_NORMAL, size); } -int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, - const char *f, int count); +size_t tty_ldisc_receive_buf(struct tty_ldisc *ld, const u8 *p, const u8 *f, + size_t count); void tty_buffer_lock_exclusive(struct tty_port *port); void tty_buffer_unlock_exclusive(struct tty_port *port); diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 49dc172dedc7..af01e89074b2 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -71,7 +71,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * call to @receive_buf(). Returning an error will prevent the ldisc from * being attached. * - * Can sleep. + * Optional. Can sleep. * * @close: [TTY] ``void ()(struct tty_struct *tty)`` * @@ -80,7 +80,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * changed to use a new line discipline. At the point of execution no * further users will enter the ldisc code for this tty. * - * Can sleep. + * Optional. Can sleep. * * @flush_buffer: [TTY] ``void ()(struct tty_struct *tty)`` * @@ -88,8 +88,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * input characters it may have queued to be delivered to the user mode * process. It may be called at any point between open and close. * - * @read: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, - * unsigned char *buf, size_t nr)`` + * Optional. + * + * @read: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, u8 *buf, + * size_t nr)`` * * This function is called when the user requests to read from the @tty. * The line discipline will return whatever characters it has buffered up @@ -97,10 +99,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * an %EIO error. Multiple read calls may occur in parallel and the ldisc * must deal with serialization issues. * - * Can sleep. + * Optional: %EIO unless provided. Can sleep. * * @write: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, - * const unsigned char *buf, size_t nr)`` + * const u8 *buf, size_t nr)`` * * This function is called when the user requests to write to the @tty. * The line discipline will deliver the characters to the low-level tty @@ -108,7 +110,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * characters first. If this function is not defined, the user will * receive an %EIO error. * - * Can sleep. + * Optional: %EIO unless provided. Can sleep. * * @ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd, * unsigned long arg)`` @@ -120,6 +122,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * discpline. So a low-level driver can "grab" an ioctl request before * the line discpline has a chance to see it. * + * Optional. + * * @compat_ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd, * unsigned long arg)`` * @@ -130,11 +134,15 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * a pointer to wordsize-sensitive structure belongs here, but most of * ldiscs will happily leave it %NULL. * + * Optional. + * * @set_termios: [TTY] ``void ()(struct tty_struct *tty, const struct ktermios *old)`` * * This function notifies the line discpline that a change has been made * to the termios structure. * + * Optional. + * * @poll: [TTY] ``int ()(struct tty_struct *tty, struct file *file, * struct poll_table_struct *wait)`` * @@ -142,6 +150,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * device. It is solely the responsibility of the line discipline to * handle poll requests. * + * Optional. + * * @hangup: [TTY] ``void ()(struct tty_struct *tty)`` * * Called on a hangup. Tells the discipline that it should cease I/O to @@ -149,10 +159,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * but should wait until any pending driver I/O is completed. No further * calls into the ldisc code will occur. * - * Can sleep. + * Optional. Can sleep. * - * @receive_buf: [DRV] ``void ()(struct tty_struct *tty, - * const unsigned char *cp, const char *fp, int count)`` + * @receive_buf: [DRV] ``void ()(struct tty_struct *tty, const u8 *cp, + * const u8 *fp, size_t count)`` * * This function is called by the low-level tty driver to send characters * received by the hardware to the line discpline for processing. @cp is @@ -161,6 +171,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * character was received with a parity error, etc. @fp may be %NULL to * indicate all data received is %TTY_NORMAL. * + * Optional. + * * @write_wakeup: [DRV] ``void ()(struct tty_struct *tty)`` * * This function is called by the low-level tty driver to signal that line @@ -170,13 +182,17 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * send, please arise a tasklet or workqueue to do the real data transfer. * Do not send data in this hook, it may lead to a deadlock. * + * Optional. + * * @dcd_change: [DRV] ``void ()(struct tty_struct *tty, bool active)`` * * Tells the discipline that the DCD pin has changed its status. Used * exclusively by the %N_PPS (Pulse-Per-Second) line discipline. * - * @receive_buf2: [DRV] ``int ()(struct tty_struct *tty, - * const unsigned char *cp, const char *fp, int count)`` + * Optional. + * + * @receive_buf2: [DRV] ``ssize_t ()(struct tty_struct *tty, const u8 *cp, + * const u8 *fp, size_t count)`` * * This function is called by the low-level tty driver to send characters * received by the hardware to the line discpline for processing. @cp is a @@ -186,8 +202,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * indicate all data received is %TTY_NORMAL. If assigned, prefer this * function for automatic flow control. * - * @lookahead_buf: [DRV] ``void ()(struct tty_struct *tty, - * const unsigned char *cp, const char *fp, int count)`` + * Optional. + * + * @lookahead_buf: [DRV] ``void ()(struct tty_struct *tty, const u8 *cp, + * const u8 *fp, size_t count)`` * * This function is called by the low-level tty driver for characters * not eaten by ->receive_buf() or ->receive_buf2(). It is useful for @@ -198,6 +216,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * same characters (e.g. by skipping the actions for high-priority * characters already handled by ->lookahead_buf()). * + * Optional. + * * @owner: module containting this ldisc (for reference counting) * * This structure defines the interface between the tty line discipline @@ -218,11 +238,10 @@ struct tty_ldisc_ops { int (*open)(struct tty_struct *tty); void (*close)(struct tty_struct *tty); void (*flush_buffer)(struct tty_struct *tty); - ssize_t (*read)(struct tty_struct *tty, struct file *file, - unsigned char *buf, size_t nr, - void **cookie, unsigned long offset); + ssize_t (*read)(struct tty_struct *tty, struct file *file, u8 *buf, + size_t nr, void **cookie, unsigned long offset); ssize_t (*write)(struct tty_struct *tty, struct file *file, - const unsigned char *buf, size_t nr); + const u8 *buf, size_t nr); int (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, @@ -235,14 +254,14 @@ struct tty_ldisc_ops { /* * The following routines are called from below. */ - void (*receive_buf)(struct tty_struct *tty, const unsigned char *cp, - const char *fp, int count); + void (*receive_buf)(struct tty_struct *tty, const u8 *cp, + const u8 *fp, size_t count); void (*write_wakeup)(struct tty_struct *tty); void (*dcd_change)(struct tty_struct *tty, bool active); - int (*receive_buf2)(struct tty_struct *tty, const unsigned char *cp, - const char *fp, int count); - void (*lookahead_buf)(struct tty_struct *tty, const unsigned char *cp, - const unsigned char *fp, unsigned int count); + size_t (*receive_buf2)(struct tty_struct *tty, const u8 *cp, + const u8 *fp, size_t count); + void (*lookahead_buf)(struct tty_struct *tty, const u8 *cp, + const u8 *fp, size_t count); struct module *owner; }; diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index edf685a24f7c..6b367eb17979 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -39,9 +39,10 @@ struct tty_port_operations { }; struct tty_port_client_operations { - int (*receive_buf)(struct tty_port *port, const unsigned char *, const unsigned char *, size_t); - void (*lookahead_buf)(struct tty_port *port, const unsigned char *cp, - const unsigned char *fp, unsigned int count); + size_t (*receive_buf)(struct tty_port *port, const u8 *cp, const u8 *fp, + size_t count); + void (*lookahead_buf)(struct tty_port *port, const u8 *cp, + const u8 *fp, size_t count); void (*write_wakeup)(struct tty_port *port); }; diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h index cf3ada3e820e..c499ae809c7d 100644 --- a/include/linux/ucs2_string.h +++ b/include/linux/ucs2_string.h @@ -10,6 +10,7 @@ typedef u16 ucs2_char_t; unsigned long ucs2_strnlen(const ucs2_char_t *s, size_t maxlength); unsigned long ucs2_strlen(const ucs2_char_t *s); unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); +ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count); int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); unsigned long ucs2_utf8size(const ucs2_char_t *src); diff --git a/include/linux/udp.h b/include/linux/udp.h index 43c1fb2d2c21..d04188714dca 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -32,25 +32,30 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) return (num + net_hash_mix(net)) & mask; } +enum { + UDP_FLAGS_CORK, /* Cork is required */ + UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ + UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ + UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ + UDP_FLAGS_ACCEPT_FRAGLIST, + UDP_FLAGS_ACCEPT_L4, + UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ + UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ + UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ +}; + struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node + + unsigned long udp_flags; + int pending; /* Any pending frames ? */ - unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - encap_enabled:1, /* This socket enabled encap - * processing; UDP tunnels and - * different encapsulation layer set - * this - */ - gro_enabled:1, /* Request GRO aggregation */ - accept_udp_l4:1, - accept_udp_fraglist:1; + /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -62,12 +67,6 @@ struct udp_sock { */ __u16 pcslen; __u16 pcrlen; -/* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ - __u8 unused[3]; /* * For encapsulation sockets. */ @@ -95,28 +94,39 @@ struct udp_sock { int forward_threshold; }; +#define udp_test_bit(nr, sk) \ + test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_set_bit(nr, sk) \ + set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_test_and_set_bit(nr, sk) \ + test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_clear_bit(nr, sk) \ + clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_assign_bit(nr, sk, val) \ + assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) + #define UDP_MAX_SEGMENTS (1 << 6UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_tx = val; + udp_assign_bit(NO_CHECK6_TX, sk, val); } static inline void udp_set_no_check6_rx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_rx = val; + udp_assign_bit(NO_CHECK6_RX, sk, val); } -static inline bool udp_get_no_check6_tx(struct sock *sk) +static inline bool udp_get_no_check6_tx(const struct sock *sk) { - return udp_sk(sk)->no_check6_tx; + return udp_test_bit(NO_CHECK6_TX, sk); } -static inline bool udp_get_no_check6_rx(struct sock *sk) +static inline bool udp_get_no_check6_rx(const struct sock *sk) { - return udp_sk(sk)->no_check6_rx; + return udp_test_bit(NO_CHECK6_RX, sk); } static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, @@ -135,10 +145,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) if (!skb_is_gso(skb)) return false; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + !udp_test_bit(ACCEPT_L4, sk)) return true; - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && + !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; return false; @@ -146,8 +158,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) static inline void udp_allow_gso(struct sock *sk) { - udp_sk(sk)->accept_udp_l4 = 1; - udp_sk(sk)->accept_udp_fraglist = 1; + udp_set_bit(ACCEPT_L4, sk); + udp_set_bit(ACCEPT_FRAGLIST, sk); } #define udp_portaddr_for_each_entry(__sk, list) \ diff --git a/include/linux/uio.h b/include/linux/uio.h index ff81e5ccaef2..b6214cbf2a43 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -21,12 +21,12 @@ struct kvec { enum iter_type { /* iter types */ + ITER_UBUF, ITER_IOVEC, - ITER_KVEC, ITER_BVEC, + ITER_KVEC, ITER_XARRAY, ITER_DISCARD, - ITER_UBUF, }; #define ITER_SOURCE 1 // == WRITE @@ -43,11 +43,7 @@ struct iov_iter { bool copy_mc; bool nofault; bool data_source; - bool user_backed; - union { - size_t iov_offset; - int last_offset; - }; + size_t iov_offset; /* * Hack alert: overlay ubuf_iovec with iovec + count, so * that the members resolve correctly regardless of the type @@ -143,7 +139,7 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) static inline bool user_backed_iter(const struct iov_iter *i) { - return i->user_backed; + return iter_is_ubuf(i) || iter_is_iovec(i); } /* @@ -163,7 +159,7 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } -size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, +size_t copy_page_from_iter_atomic(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); @@ -184,6 +180,13 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, { return copy_page_to_iter(&folio->page, offset, bytes, i); } + +static inline size_t copy_folio_from_iter_atomic(struct folio *folio, + size_t offset, size_t bytes, struct iov_iter *i) +{ + return copy_page_from_iter_atomic(&folio->page, offset, bytes, i); +} + size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes, struct iov_iter *i); @@ -335,27 +338,6 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) return npages; } -struct csum_state { - __wsum csum; - size_t off; -}; - -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); - -static __always_inline __must_check -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, - __wsum *csum, struct iov_iter *i) -{ - size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i); - if (likely(copied == bytes)) - return true; - iov_iter_revert(i, copied); - return false; -} -size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, - struct iov_iter *i); - struct iovec *iovec_from_user(const struct iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_iov, bool compat); @@ -376,7 +358,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, *i = (struct iov_iter) { .iter_type = ITER_UBUF, .copy_mc = false, - .user_backed = true, .data_source = direction, .ubuf = buf, .count = count, diff --git a/include/linux/units.h b/include/linux/units.h index 2793a41e73a2..45110daaf8d3 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -2,6 +2,7 @@ #ifndef _LINUX_UNITS_H #define _LINUX_UNITS_H +#include <linux/bits.h> #include <linux/math.h> /* Metric prefixes in accordance with Système international (d'unités) */ @@ -31,6 +32,10 @@ #define MICROWATT_PER_MILLIWATT 1000UL #define MICROWATT_PER_WATT 1000000UL +#define BYTES_PER_KBIT (KILO / BITS_PER_BYTE) +#define BYTES_PER_MBIT (MEGA / BITS_PER_BYTE) +#define BYTES_PER_GBIT (GIGA / BITS_PER_BYTE) + #define ABSOLUTE_ZERO_MILLICELSIUS -273150 static inline long milli_kelvin_to_millicelsius(long t) diff --git a/include/linux/usb.h b/include/linux/usb.h index 25f8e62a30ec..8c61643acd49 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -25,7 +25,6 @@ struct usb_device; struct usb_driver; -struct wusb_dev; /*-------------------------------------------------------------------------*/ @@ -425,7 +424,6 @@ struct usb_host_config { struct usb_host_bos { struct usb_bos_descriptor *desc; - /* wireless cap descriptor is handled by wusb */ struct usb_ext_cap_descriptor *ext_cap; struct usb_ss_cap_descriptor *ss_cap; struct usb_ssp_cap_descriptor *ssp_cap; @@ -612,7 +610,6 @@ struct usb3_lpm_parameters { * WUSB devices are not, until we authorize them from user space. * FIXME -- complete doc * @authenticated: Crypto authentication passed - * @wusb: device is Wireless USB * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -634,8 +631,6 @@ struct usb3_lpm_parameters { * @do_remote_wakeup: remote wakeup should be enabled * @reset_resume: needs reset instead of resume * @port_is_suspended: the upstream port is suspended (L2 or U3) - * @wusb_dev: if this is a Wireless USB device, link to the WUSB - * specific data for the device. * @slot_id: Slot ID assigned by xHCI * @removable: Device can be physically removed from this port * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout. @@ -696,7 +691,6 @@ struct usb_device { unsigned have_langid:1; unsigned authorized:1; unsigned authenticated:1; - unsigned wusb:1; unsigned lpm_capable:1; unsigned lpm_devinit_allow:1; unsigned usb2_hw_lpm_capable:1; @@ -727,7 +721,6 @@ struct usb_device { unsigned reset_resume:1; unsigned port_is_suspended:1; - struct wusb_dev *wusb_dev; int slot_id; struct usb2_lpm_parameters l1_params; struct usb3_lpm_parameters u1_params; @@ -1742,11 +1735,6 @@ static inline void usb_fill_bulk_urb(struct urb *urb, * encoding of the endpoint interval, and express polling intervals in * microframes (eight per millisecond) rather than in frames (one per * millisecond). - * - * Wireless USB also uses the logarithmic encoding, but specifies it in units of - * 128us instead of 125us. For Wireless USB devices, the interval is passed - * through to the host controller, rather than being translated into microframe - * units. */ static inline void usb_fill_int_urb(struct urb *urb, struct usb_device *dev, @@ -1835,22 +1823,6 @@ void *usb_alloc_coherent(struct usb_device *dev, size_t size, void usb_free_coherent(struct usb_device *dev, size_t size, void *addr, dma_addr_t dma); -#if 0 -struct urb *usb_buffer_map(struct urb *urb); -void usb_buffer_dmasync(struct urb *urb); -void usb_buffer_unmap(struct urb *urb); -#endif - -struct scatterlist; -int usb_buffer_map_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int nents); -#if 0 -void usb_buffer_dmasync_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int n_hw_ents); -#endif -void usb_buffer_unmap_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int n_hw_ents); - /*-------------------------------------------------------------------* * SYNCHRONOUS CALL SUPPORT * *-------------------------------------------------------------------*/ diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 969e7dba6358..c93b410b314a 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -3,7 +3,7 @@ * This file holds USB constants and structures that are needed for * USB device APIs. These are used by the USB device model, which is * defined in chapter 9 of the USB 2.0 specification and in the - * Wireless USB 1.0 (spread around). Linux has several APIs in C that + * Wireless USB 1.0 spec (now defunct). Linux has several APIs in C that * need these: * * - the host side Linux-USB kernel driver API; @@ -14,9 +14,6 @@ * act either as a USB host or as a USB device. That means the host and * device side APIs benefit from working well together. * - * There's also "Wireless USB", using low power short range radios for - * peripheral interconnection but otherwise building on the USB framework. - * * Note all descriptors are declared '__attribute__((packed))' so that: * * [a] they never get padded, either internally (USB spec writers diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index ee38835ed77c..5a7f96684ea2 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -63,6 +63,8 @@ struct ci_hdrc_platform_data { #define CI_HDRC_IMX_IS_HSIC BIT(14) #define CI_HDRC_PMQOS BIT(15) #define CI_HDRC_PHY_VBUS_CONTROL BIT(16) +#define CI_HDRC_HAS_PORTSC_PEC_MISSED BIT(17) +#define CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS BIT(18) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 07531c4f4350..af3cd2aae4bc 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -35,6 +35,14 @@ * are ready. The control transfer will then be kept from completing till * all the function drivers that requested for USB_GADGET_DELAYED_STAUS * invoke usb_composite_setup_continue(). + * + * NOTE: USB_GADGET_DELAYED_STATUS must not be used in UDC drivers: they + * must delay completing the status stage for 0-length control transfers + * regardless of the whether USB_GADGET_DELAYED_STATUS is returned from + * the gadget driver's setup() callback. + * Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS, + * which is a bug. These drivers must be fixed and USB_GADGET_DELAYED_STATUS + * must be contained within the composite framework. */ #define USB_GADGET_DELAYED_STATUS 0x7fff /* Impossibly large value */ @@ -450,29 +458,6 @@ static inline struct usb_composite_driver *to_cdriver( * * One of these devices is allocated and initialized before the * associated device driver's bind() is called. - * - * OPEN ISSUE: it appears that some WUSB devices will need to be - * built by combining a normal (wired) gadget with a wireless one. - * This revision of the gadget framework should probably try to make - * sure doing that won't hurt too much. - * - * One notion for how to handle Wireless USB devices involves: - * - * (a) a second gadget here, discovery mechanism TBD, but likely - * needing separate "register/unregister WUSB gadget" calls; - * (b) updates to usb_gadget to include flags "is it wireless", - * "is it wired", plus (presumably in a wrapper structure) - * bandgroup and PHY info; - * (c) presumably a wireless_ep wrapping a usb_ep, and reporting - * wireless-specific parameters like maxburst and maxsequence; - * (d) configurations that are specific to wireless links; - * (e) function drivers that understand wireless configs and will - * support wireless for (additional) function instances; - * (f) a function to support association setup (like CBAF), not - * necessarily requiring a wireless adapter; - * (g) composite device setup that can create one or more wireless - * configs, including appropriate association setup support; - * (h) more, TBD. */ struct usb_composite_dev { struct usb_gadget *gadget; diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 75bda0783395..6532beb587b1 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -711,6 +711,15 @@ static inline int usb_gadget_check_config(struct usb_gadget *gadget) * get_interface. Setting a configuration (or interface) is where * endpoints should be activated or (config 0) shut down. * + * The gadget driver's setup() callback does not have to queue a response to + * ep0 within the setup() call, the driver can do it after setup() returns. + * The UDC driver must wait until such a response is queued before proceeding + * with the data/status stages of the control transfer. + * + * NOTE: Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS + * being returned from the setup() callback, which is a bug. See the comment + * next to USB_GADGET_DELAYED_STATUS for details. + * * (Note that only the default control endpoint is supported. Neither * hosts nor devices generally support control traffic except to ep0.) * diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 4e9623e8492b..00724b4f6e12 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -154,7 +154,6 @@ struct usb_hcd { /* The next flag is a stopgap, to be removed when all the HCDs * support the new root-hub polling mechanism. */ unsigned uses_new_polling:1; - unsigned wireless:1; /* Wireless USB HCD */ unsigned has_tt:1; /* Integrated TT in root hub */ unsigned amd_resume_bug:1; /* AMD remote wakeup quirk */ unsigned can_do_streams:1; /* HC supports streams */ @@ -249,7 +248,6 @@ struct hc_driver { #define HCD_SHARED 0x0004 /* Two (or more) usb_hcds share HW */ #define HCD_USB11 0x0010 /* USB 1.1 */ #define HCD_USB2 0x0020 /* USB 2.0 */ -#define HCD_USB25 0x0030 /* Wireless USB 1.0 (USB 2.5)*/ #define HCD_USB3 0x0040 /* USB 3.0 */ #define HCD_USB31 0x0050 /* USB 3.1 */ #define HCD_USB32 0x0060 /* USB 3.2 */ @@ -486,8 +484,25 @@ extern int usb_hcd_pci_probe(struct pci_dev *dev, extern void usb_hcd_pci_remove(struct pci_dev *dev); extern void usb_hcd_pci_shutdown(struct pci_dev *dev); +#ifdef CONFIG_USB_PCI_AMD extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev); +static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, + const struct hc_driver *driver) +{ + if (!usb_hcd_amd_remote_wakeup_quirk(dev)) + return false; + if (driver->flags & (HCD_USB11 | HCD_USB3)) + return true; + return false; +} +#else /* CONFIG_USB_PCI_AMD */ +static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, + const struct hc_driver *driver) +{ + return false; +} +#endif extern const struct dev_pm_ops usb_hcd_pci_pm_ops; #endif /* CONFIG_USB_PCI */ diff --git a/include/linux/usb/ljca.h b/include/linux/usb/ljca.h new file mode 100644 index 000000000000..47661feda96c --- /dev/null +++ b/include/linux/usb/ljca.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023, Intel Corporation. All rights reserved. + */ +#ifndef _LINUX_USB_LJCA_H_ +#define _LINUX_USB_LJCA_H_ + +#include <linux/auxiliary_bus.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#define LJCA_MAX_GPIO_NUM 64 + +#define auxiliary_dev_to_ljca_client(auxiliary_dev) \ + container_of(auxiliary_dev, struct ljca_client, auxdev) + +struct ljca_adapter; + +/** + * typedef ljca_event_cb_t - event callback function signature + * + * @context: the execution context of who registered this callback + * @cmd: the command from device for this event + * @evt_data: the event data payload + * @len: the event data payload length + * + * The callback function is called in interrupt context and the data payload is + * only valid during the call. If the user needs later access of the data, it + * must copy it. + */ +typedef void (*ljca_event_cb_t)(void *context, u8 cmd, const void *evt_data, int len); + +/** + * struct ljca_client - represent a ljca client device + * + * @type: ljca client type + * @id: ljca client id within same client type + * @link: ljca client on the same ljca adapter + * @auxdev: auxiliary device object + * @adapter: ljca adapter the ljca client sit on + * @context: the execution context of the event callback + * @event_cb: ljca client driver register this callback to get + * firmware asynchronous rx buffer pending notifications + * @event_cb_lock: spinlock to protect event callback + */ +struct ljca_client { + u8 type; + u8 id; + struct list_head link; + struct auxiliary_device auxdev; + struct ljca_adapter *adapter; + + void *context; + ljca_event_cb_t event_cb; + /* lock to protect event_cb */ + spinlock_t event_cb_lock; +}; + +/** + * struct ljca_gpio_info - ljca gpio client device info + * + * @num: ljca gpio client device pin number + * @valid_pin_map: ljca gpio client device valid pin mapping + */ +struct ljca_gpio_info { + unsigned int num; + DECLARE_BITMAP(valid_pin_map, LJCA_MAX_GPIO_NUM); +}; + +/** + * struct ljca_i2c_info - ljca i2c client device info + * + * @id: ljca i2c client device identification number + * @capacity: ljca i2c client device capacity + * @intr_pin: ljca i2c client device interrupt pin number if exists + */ +struct ljca_i2c_info { + u8 id; + u8 capacity; + u8 intr_pin; +}; + +/** + * struct ljca_spi_info - ljca spi client device info + * + * @id: ljca spi client device identification number + * @capacity: ljca spi client device capacity + */ +struct ljca_spi_info { + u8 id; + u8 capacity; +}; + +/** + * ljca_register_event_cb - register a callback function to receive events + * + * @client: ljca client device + * @event_cb: callback function + * @context: execution context of event callback + * + * Return: 0 in case of success, negative value in case of error + */ +int ljca_register_event_cb(struct ljca_client *client, ljca_event_cb_t event_cb, void *context); + +/** + * ljca_unregister_event_cb - unregister the callback function for an event + * + * @client: ljca client device + */ +void ljca_unregister_event_cb(struct ljca_client *client); + +/** + * ljca_transfer - issue a LJCA command and wait for a response + * + * @client: ljca client device + * @cmd: the command to be sent to the device + * @obuf: the buffer to be sent to the device; it can be NULL if the user + * doesn't need to transmit data with this command + * @obuf_len: the size of the buffer to be sent to the device; it should + * be 0 when obuf is NULL + * @ibuf: any data associated with the response will be copied here; it can be + * NULL if the user doesn't need the response data + * @ibuf_len: must be initialized to the input buffer size + * + * Return: the actual length of response data for success, negative value for errors + */ +int ljca_transfer(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len, u8 *ibuf, u8 ibuf_len); + +/** + * ljca_transfer_noack - issue a LJCA command without a response + * + * @client: ljca client device + * @cmd: the command to be sent to the device + * @obuf: the buffer to be sent to the device; it can be NULL if the user + * doesn't need to transmit data with this command + * @obuf_len: the size of the buffer to be sent to the device + * + * Return: 0 for success, negative value for errors + */ +int ljca_transfer_noack(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len); + +#endif diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index c59fb79a42e8..eb626af0e4e7 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -228,6 +228,7 @@ enum pd_pdo_type { #define PDO_FIXED_UNCHUNK_EXT BIT(24) /* Unchunked Extended Message supported (Source) */ #define PDO_FIXED_FRS_CURR_MASK (BIT(24) | BIT(23)) /* FR_Swap Current (Sink) */ #define PDO_FIXED_FRS_CURR_SHIFT 23 +#define PDO_FIXED_PEAK_CURR_SHIFT 20 #define PDO_FIXED_VOLT_SHIFT 10 /* 50mV units */ #define PDO_FIXED_CURR_SHIFT 0 /* 10mA units */ diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h index b057250704e8..3a747938cdab 100644 --- a/include/linux/usb/pd_vdo.h +++ b/include/linux/usb/pd_vdo.h @@ -376,6 +376,7 @@ | ((vbm) & 0x3) << 9 | (sbu) << 8 | (sbut) << 7 | ((cur) & 0x3) << 5 \ | (vbt) << 4 | (sopp) << 3 | ((spd) & 0x7)) +#define VDO_TYPEC_CABLE_SPEED(vdo) ((vdo) & 0x7) #define VDO_TYPEC_CABLE_TYPE(vdo) (((vdo) >> 18) & 0x3) /* diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 20d88b1defc3..33a4c146dc19 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -29,6 +29,8 @@ #define VENDOR_ID_LINKSYS 0x13b1 #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 +#define VENDOR_ID_DLINK 0x2001 +#define VENDOR_ID_ASUS 0x0b05 #if IS_REACHABLE(CONFIG_USB_RTL8152) extern u8 rtl8152_get_version(struct usb_interface *intf); diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index d418c55523a7..372898d9eeb0 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -5,16 +5,6 @@ * Copyright (C) 2011 Renesas Solutions Corp. * Copyright (C) 2019 Renesas Electronics Corporation * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #ifndef RENESAS_USB_H #define RENESAS_USB_H diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 85e95a3251d3..83376473ac76 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -103,6 +103,7 @@ #define TCPC_POWER_STATUS_SINKING_VBUS BIT(0) #define TCPC_FAULT_STATUS 0x1f +#define TCPC_FAULT_STATUS_ALL_REG_RST_TO_DEFAULT BIT(7) #define TCPC_ALERT_EXTENDED 0x21 diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 8fa781207970..a05d6f6f2536 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -202,6 +202,8 @@ struct typec_cable_desc { * @accessory: Audio, Debug or none. * @identity: Discover Identity command data * @pd_revision: USB Power Delivery Specification Revision if supported + * @attach: Notification about attached USB device + * @deattach: Notification about removed USB device * * Details about a partner that is attached to USB Type-C port. If @identity * member exists when partner is registered, a directory named "identity" is @@ -217,6 +219,9 @@ struct typec_partner_desc { enum typec_accessory accessory; struct usb_pd_identity *identity; u16 pd_revision; /* 0300H = "3.0" */ + + void (*attach)(struct typec_partner *partner, struct device *dev); + void (*deattach)(struct typec_partner *partner, struct device *dev); }; /** @@ -335,4 +340,36 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_ int typec_partner_set_usb_power_delivery(struct typec_partner *partner, struct usb_power_delivery *pd); +/** + * struct typec_connector - Representation of Type-C port for external drivers + * @attach: notification about device removal + * @deattach: notification about device removal + * + * Drivers that control the USB and other ports (DisplayPorts, etc.), that are + * connected to the Type-C connectors, can use these callbacks to inform the + * Type-C connector class about connections and disconnections. That information + * can then be used by the typec-port drivers to power on or off parts that are + * needed or not needed - as an example, in USB mode if USB2 device is + * enumerated, USB3 components (retimers, phys, and what have you) do not need + * to be powered on. + * + * The attached (enumerated) devices will be liked with the typec-partner device. + */ +struct typec_connector { + void (*attach)(struct typec_connector *con, struct device *dev); + void (*deattach)(struct typec_connector *con, struct device *dev); +}; + +static inline void typec_attach(struct typec_connector *con, struct device *dev) +{ + if (con && con->attach) + con->attach(con, dev); +} + +static inline void typec_deattach(struct typec_connector *con, struct device *dev) +{ + if (con && con->deattach) + con->deattach(con, dev); +} + #endif /* __LINUX_USB_TYPEC_H */ diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h index 350d49012659..28aeef8f9e7b 100644 --- a/include/linux/usb/typec_altmode.h +++ b/include/linux/usb/typec_altmode.h @@ -67,7 +67,7 @@ struct typec_altmode_ops { int typec_altmode_enter(struct typec_altmode *altmode, u32 *vdo); int typec_altmode_exit(struct typec_altmode *altmode); -void typec_altmode_attention(struct typec_altmode *altmode, u32 vdo); +int typec_altmode_attention(struct typec_altmode *altmode, u32 vdo); int typec_altmode_vdm(struct typec_altmode *altmode, const u32 header, const u32 *vdo, int count); int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf, diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index 8d09c2f0a9b8..1f358098522d 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -67,8 +67,10 @@ enum { #define DP_CAP_UFP_D 1 #define DP_CAP_DFP_D 2 #define DP_CAP_DFP_D_AND_UFP_D 3 -#define DP_CAP_DP_SIGNALING BIT(2) /* Always set */ -#define DP_CAP_GEN2 BIT(3) /* Reserved after v1.0b */ +#define DP_CAP_DP_SIGNALLING(_cap_) (((_cap_) & GENMASK(5, 2)) >> 2) +#define DP_CAP_SIGNALLING_HBR3 1 +#define DP_CAP_SIGNALLING_UHBR10 2 +#define DP_CAP_SIGNALLING_UHBR20 3 #define DP_CAP_RECEPTACLE BIT(6) #define DP_CAP_USB BIT(7) #define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) @@ -78,6 +80,13 @@ enum { DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) #define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) +#define DP_CAP_UHBR_13_5_SUPPORT BIT(26) +#define DP_CAP_CABLE_TYPE(_cap_) (((_cap_) & GENMASK(29, 28)) >> 28) +#define DP_CAP_CABLE_TYPE_PASSIVE 0 +#define DP_CAP_CABLE_TYPE_RE_TIMER 1 +#define DP_CAP_CABLE_TYPE_RE_DRIVER 2 +#define DP_CAP_CABLE_TYPE_OPTICAL 3 +#define DP_CAP_DPAM_VERSION BIT(30) /* DisplayPort Status Update VDO bits */ #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3) @@ -97,13 +106,24 @@ enum { #define DP_CONF_CURRENTLY(_conf_) ((_conf_) & 3) #define DP_CONF_UFP_U_AS_DFP_D BIT(0) #define DP_CONF_UFP_U_AS_UFP_D BIT(1) -#define DP_CONF_SIGNALING_DP BIT(2) -#define DP_CONF_SIGNALING_GEN_2 BIT(3) /* Reserved after v1.0b */ +#define DP_CONF_SIGNALLING_MASK GENMASK(5, 2) +#define DP_CONF_SIGNALLING_SHIFT 2 +#define DP_CONF_SIGNALLING_HBR3 1 +#define DP_CONF_SIGNALLING_UHBR10 2 +#define DP_CONF_SIGNALLING_UHBR20 3 #define DP_CONF_PIN_ASSIGNEMENT_SHIFT 8 #define DP_CONF_PIN_ASSIGNEMENT_MASK GENMASK(15, 8) /* Helper for setting/getting the pin assignment value to the configuration */ #define DP_CONF_SET_PIN_ASSIGN(_a_) ((_a_) << 8) #define DP_CONF_GET_PIN_ASSIGN(_conf_) (((_conf_) & GENMASK(15, 8)) >> 8) +#define DP_CONF_UHBR13_5_SUPPORT BIT(26) +#define DP_CONF_CABLE_TYPE_MASK GENMASK(29, 28) +#define DP_CONF_CABLE_TYPE_SHIFT 28 +#define DP_CONF_CABLE_TYPE_PASSIVE 0 +#define DP_CONF_CABLE_TYPE_RE_TIMER 1 +#define DP_CONF_CABLE_TYPE_RE_DRIVER 2 +#define DP_CONF_CABLE_TYPE_OPTICAL 3 +#define DP_CONF_DPAM_VERSION BIT(30) #endif /* __USB_TYPEC_DP_H */ diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index 63dd44b72e0c..c7a2153bd6f5 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -46,6 +46,7 @@ struct typec_thunderbolt_data { #define TBT_CABLE_OPTICAL BIT(21) #define TBT_CABLE_RETIMER BIT(22) #define TBT_CABLE_LINK_TRAINING BIT(23) +#define TBT_CABLE_ACTIVE_PASSIVE BIT(25) #define TBT_SET_CABLE_SPEED(_s_) (((_s_) & GENMASK(2, 0)) << 16) #define TBT_SET_CABLE_ROUNDED(_g_) (((_g_) & GENMASK(1, 0)) << 19) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 45f09bec02c4..6030a8235617 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -65,6 +65,10 @@ enum rlimit_type { UCOUNT_RLIMIT_COUNTS, }; +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc; +#endif + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -102,6 +106,10 @@ struct user_namespace { struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; + +#if IS_ENABLED(CONFIG_BINFMT_MISC) + struct binfmt_misc *binfmt_misc; +#endif } __randomize_layout; struct ucounts { diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index ac7b0c96d351..f2dc19f40d05 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -46,6 +46,7 @@ enum mfill_atomic_mode { MFILL_ATOMIC_COPY, MFILL_ATOMIC_ZEROPAGE, MFILL_ATOMIC_CONTINUE, + MFILL_ATOMIC_POISON, NR_MFILL_ATOMIC_MODES, }; @@ -83,6 +84,9 @@ extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long len, atomic_t *mmap_changing, uffd_flags_t flags); +extern ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start, + unsigned long len, atomic_t *mmap_changing, + uffd_flags_t flags); extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing); @@ -157,11 +161,22 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) } static inline bool vma_can_userfault(struct vm_area_struct *vma, - unsigned long vm_flags) + unsigned long vm_flags, + bool wp_async) { + vm_flags &= __VM_UFFD_FLAGS; + if ((vm_flags & VM_UFFD_MINOR) && (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) return false; + + /* + * If wp async enabled, and WP is the only mode enabled, allow any + * memory type. + */ + if (wp_async && (vm_flags == VM_UFFD_WP)) + return true; + #ifndef CONFIG_PTE_MARKER_UFFD_WP /* * If user requested uffd-wp but not enabled pte markers for @@ -171,6 +186,8 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) return false; #endif + + /* By default, allow any of anon|shmem|hugetlb */ return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || vma_is_shmem(vma); } @@ -193,6 +210,7 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); +extern bool userfaultfd_wp_async(struct vm_area_struct *vma); #else /* CONFIG_USERFAULTFD */ @@ -203,6 +221,13 @@ static inline vm_fault_t handle_userfault(struct vm_fault *vmf, return VM_FAULT_SIGBUS; } +static inline long uffd_wp_range(struct vm_area_struct *vma, + unsigned long start, unsigned long len, + bool enable_wp) +{ + return false; +} + static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) { @@ -293,6 +318,11 @@ static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) return false; } +static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) +{ + return false; +} + #endif /* CONFIG_USERFAULTFD */ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index db1b0eaef4eb..db15ac07f8a6 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -204,10 +204,23 @@ struct vdpa_map_file { * @vdev: vdpa device * @idx: virtqueue index * Returns u32: group id for this virtqueue + * @get_vq_desc_group: Get the group id for the descriptor table of + * a specific virtqueue (optional) + * @vdev: vdpa device + * @idx: virtqueue index + * Returns u32: group id for the descriptor table + * portion of this virtqueue. Could be different + * than the one from @get_vq_group, in which case + * the access to the descriptor table can be + * confined to a separate asid, isolating from + * the virtqueue's buffer address access. * @get_device_features: Get virtio features supported by the device * @vdev: vdpa device * Returns the virtio features support by the * device + * @get_backend_features: Get parent-specific backend features (optional) + * Returns the vdpa features supported by the + * device. * @set_driver_features: Set virtio features supported by the driver * @vdev: vdpa device * @features: feature support by the driver @@ -239,6 +252,17 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) + * @compat_reset: Reset device with compatibility quirks to + * accommodate older userspace. Only needed by + * parent driver which used to have bogus reset + * behaviour, and has to maintain such behaviour + * for compatibility with older userspace. + * Historically compliant driver only has to + * implement .reset, Historically non-compliant + * driver should implement both. + * @vdev: vdpa device + * @flags: compatibility quirks for reset + * Returns integer: success (0) or error (< 0) * @suspend: Suspend the device (optional) * @vdev: vdpa device * Returns integer: success (0) or error (< 0) @@ -314,6 +338,15 @@ struct vdpa_map_file { * @iova: iova to be unmapped * @size: size of the area * Returns integer: success (0) or error (< 0) + * @reset_map: Reset device memory mapping to the default + * state (optional) + * Needed for devices that are using device + * specific DMA translation and prefer mapping + * to be decoupled from the virtio life cycle, + * i.e. device .reset op does not reset mapping + * @vdev: vdpa device + * @asid: address space identifier + * Returns integer: success (0) or error (< 0) * @get_vq_dma_dev: Get the dma device for a specific * virtqueue (optional) * @vdev: vdpa device @@ -357,7 +390,9 @@ struct vdpa_config_ops { /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx); + u32 (*get_vq_desc_group)(struct vdpa_device *vdev, u16 idx); u64 (*get_device_features)(struct vdpa_device *vdev); + u64 (*get_backend_features)(const struct vdpa_device *vdev); int (*set_driver_features)(struct vdpa_device *vdev, u64 features); u64 (*get_driver_features)(struct vdpa_device *vdev); void (*set_config_cb)(struct vdpa_device *vdev, @@ -369,6 +404,8 @@ struct vdpa_config_ops { u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); int (*reset)(struct vdpa_device *vdev); + int (*compat_reset)(struct vdpa_device *vdev, u32 flags); +#define VDPA_RESET_F_CLEAN_MAP 1 int (*suspend)(struct vdpa_device *vdev); int (*resume)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); @@ -390,6 +427,7 @@ struct vdpa_config_ops { u64 iova, u64 size, u64 pa, u32 perm, void *opaque); int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid, u64 iova, u64 size); + int (*reset_map)(struct vdpa_device *vdev, unsigned int asid); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); @@ -481,14 +519,17 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) return vdev->dma_dev; } -static inline int vdpa_reset(struct vdpa_device *vdev) +static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags) { const struct vdpa_config_ops *ops = vdev->config; int ret; down_write(&vdev->cf_lock); vdev->features_valid = false; - ret = ops->reset(vdev); + if (ops->compat_reset && flags) + ret = ops->compat_reset(vdev, flags); + else + ret = ops->reset(vdev); up_write(&vdev->cf_lock); return ret; } diff --git a/include/linux/verification.h b/include/linux/verification.h index f34e50ebcf60..cb2d47f28091 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -8,6 +8,7 @@ #ifndef _LINUX_VERIFICATION_H #define _LINUX_VERIFICATION_H +#include <linux/errno.h> #include <linux/types.h> /* diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 2c137ea94a3e..a65b2513f8cd 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -13,6 +13,7 @@ #include <linux/mm.h> #include <linux/workqueue.h> #include <linux/poll.h> +#include <linux/cdev.h> #include <uapi/linux/vfio.h> #include <linux/iova_bitmap.h> @@ -42,7 +43,11 @@ struct vfio_device { */ const struct vfio_migration_ops *mig_ops; const struct vfio_log_ops *log_ops; +#if IS_ENABLED(CONFIG_VFIO_GROUP) struct vfio_group *group; + struct list_head group_next; + struct list_head iommu_entry; +#endif struct vfio_device_set *dev_set; struct list_head dev_set_list; unsigned int migration_flags; @@ -51,17 +56,19 @@ struct vfio_device { /* Members below here are private, not for driver use */ unsigned int index; struct device device; /* device.kref covers object life circle */ +#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) + struct cdev cdev; +#endif refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp; - struct list_head group_next; - struct list_head iommu_entry; struct iommufd_access *iommufd_access; void (*put_kvm)(struct kvm *kvm); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; - bool iommufd_attached; + u8 iommufd_attached:1; #endif + u8 cdev_opened:1; }; /** @@ -73,7 +80,9 @@ struct vfio_device { * @bind_iommufd: Called when binding the device to an iommufd * @unbind_iommufd: Opposite of bind_iommufd * @attach_ioas: Called when attaching device to an IOAS/HWPT managed by the - * bound iommufd. Undo in unbind_iommufd. + * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not + * called. + * @detach_ioas: Opposite of attach_ioas * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -97,6 +106,7 @@ struct vfio_device_ops { struct iommufd_ctx *ictx, u32 *out_device_id); void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); + void (*detach_ioas)(struct vfio_device *vdev); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -114,15 +124,31 @@ struct vfio_device_ops { }; #if IS_ENABLED(CONFIG_IOMMUFD) +struct iommufd_ctx *vfio_iommufd_device_ictx(struct vfio_device *vdev); +int vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx); int vfio_iommufd_physical_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev); int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +void vfio_iommufd_emulated_detach_ioas(struct vfio_device *vdev); #else +static inline struct iommufd_ctx * +vfio_iommufd_device_ictx(struct vfio_device *vdev) +{ + return NULL; +} + +static inline int +vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) +{ + return VFIO_PCI_DEVID_NOT_OWNED; +} + #define vfio_iommufd_physical_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) @@ -130,6 +156,8 @@ int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_physical_attach_ioas \ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_physical_detach_ioas \ + ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_emulated_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) @@ -137,8 +165,15 @@ int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_emulated_attach_ioas \ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_emulated_detach_ioas \ + ((void (*)(struct vfio_device *vdev)) NULL) #endif +static inline bool vfio_device_cdev_opened(struct vfio_device *device) +{ + return device->cdev_opened; +} + /** * struct vfio_migration_ops - VFIO bus device driver migration callbacks * @@ -239,20 +274,40 @@ void vfio_unregister_group_dev(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set); +struct vfio_device * +vfio_find_device_in_devset(struct vfio_device_set *dev_set, + struct device *dev); int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state cur_fsm, enum vfio_device_mig_state new_fsm, enum vfio_device_mig_state *next_fsm); +void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, + u32 req_nodes); + /* * External user API */ struct iommu_group *vfio_file_iommu_group(struct file *file); + +#if IS_ENABLED(CONFIG_VFIO_GROUP) bool vfio_file_is_group(struct file *file); +bool vfio_file_has_dev(struct file *file, struct vfio_device *device); +#else +static inline bool vfio_file_is_group(struct file *file) +{ + return false; +} + +static inline bool vfio_file_has_dev(struct file *file, struct vfio_device *device) +{ + return false; +} +#endif +bool vfio_file_is_valid(struct file *file); bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); -bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index b4b9137f9792..97129a1bbb7d 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: MIT */ + /* * The VGA aribiter manages VGA space routing and VGA resource decode to * allow multiple VGA devices to be used in a system in a safe way. @@ -5,27 +7,6 @@ * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com> * (C) Copyright 2007, 2009 Tiago Vignatti <vignatti@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS - * IN THE SOFTWARE. - * */ #ifndef LINUX_VGA_H @@ -96,7 +77,7 @@ static inline int vga_client_register(struct pci_dev *pdev, static inline int vga_get_interruptible(struct pci_dev *pdev, unsigned int rsrc) { - return vga_get(pdev, rsrc, 1); + return vga_get(pdev, rsrc, 1); } /** @@ -111,7 +92,7 @@ static inline int vga_get_interruptible(struct pci_dev *pdev, static inline int vga_get_uninterruptible(struct pci_dev *pdev, unsigned int rsrc) { - return vga_get(pdev, rsrc, 0); + return vga_get(pdev, rsrc, 0); } static inline void vga_client_unregister(struct pci_dev *pdev) diff --git a/include/linux/virtio.h b/include/linux/virtio.h index de6041deee37..4cc614a38376 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> #include <linux/gfp.h> +#include <linux/dma-mapping.h> /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -61,6 +62,8 @@ int virtqueue_add_sgs(struct virtqueue *vq, void *data, gfp_t gfp); +struct device *virtqueue_dma_dev(struct virtqueue *vq); + bool virtqueue_kick(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq); @@ -78,6 +81,8 @@ bool virtqueue_enable_cb(struct virtqueue *vq); unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); +int virtqueue_set_dma_premapped(struct virtqueue *_vq); + bool virtqueue_poll(struct virtqueue *vq, unsigned); bool virtqueue_enable_cb_delayed(struct virtqueue *vq); @@ -95,6 +100,8 @@ dma_addr_t virtqueue_get_used_addr(const struct virtqueue *vq); int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); +int virtqueue_reset(struct virtqueue *vq, + void (*recycle)(struct virtqueue *vq, void *buf)); /** * struct virtio_device - representation of a device using virtio @@ -184,10 +191,8 @@ struct virtio_driver { void (*scan)(struct virtio_device *dev); void (*remove)(struct virtio_device *dev); void (*config_changed)(struct virtio_device *dev); -#ifdef CONFIG_PM int (*freeze)(struct virtio_device *dev); int (*restore)(struct virtio_device *dev); -#endif }; static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) @@ -206,4 +211,19 @@ void unregister_virtio_driver(struct virtio_driver *drv); #define module_virtio_driver(__virtio_driver) \ module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) + +dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs); +int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr); + +bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr); +void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); +void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); #endif /* _LINUX_VIRTIO_H */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 7b4dd69555e4..27cc1d464321 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,8 +3,8 @@ #define _LINUX_VIRTIO_NET_H #include <linux/if_vlan.h> +#include <linux/udp.h> #include <uapi/linux/tcp.h> -#include <uapi/linux/udp.h> #include <uapi/linux/virtio_net.h> static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) @@ -151,9 +151,22 @@ retry: unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); - /* UFO may not include transport header in gso_size. */ - if (gso_type & SKB_GSO_UDP) + switch (gso_type & ~SKB_GSO_TCP_ECN) { + case SKB_GSO_UDP: + /* UFO may not include transport header in gso_size. */ nh_off -= thlen; + break; + case SKB_GSO_UDP_L4: + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return -EINVAL; + if (skb->csum_offset != offsetof(struct udphdr, check)) + return -EINVAL; + if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (gso_type != SKB_GSO_UDP_L4) + return -EINVAL; + break; + } /* Kernel has a special handling for GSO_BY_FRAGS. */ if (gso_size == GSO_BY_FRAGS) diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 067ac1d789bc..a09e13a577a9 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -5,44 +5,48 @@ #include <linux/pci.h> #include <linux/virtio_pci.h> -struct virtio_pci_modern_common_cfg { - struct virtio_pci_common_cfg cfg; - - __le16 queue_notify_data; /* read-write */ - __le16 queue_reset; /* read-write */ -}; - +/** + * struct virtio_pci_modern_device - info for modern PCI virtio + * @pci_dev: Ptr to the PCI device struct + * @common: Position of the common capability in the PCI config + * @device: Device-specific data (non-legacy mode) + * @notify_base: Base of vq notifications (non-legacy mode) + * @notify_pa: Physical base of vq notifications + * @isr: Where to read and clear interrupt + * @notify_len: So we can sanity-check accesses + * @device_len: So we can sanity-check accesses + * @notify_map_cap: Capability for when we need to map notifications per-vq + * @notify_offset_multiplier: Multiply queue_notify_off by this value + * (non-legacy mode). + * @modern_bars: Bitmask of BARs + * @id: Device and vendor id + * @device_id_check: Callback defined before vp_modern_probe() to be used to + * verify the PCI device is a vendor's expected device rather + * than the standard virtio PCI device + * Returns the found device id or ERRNO + * @dma_mask: Optional mask instead of the traditional DMA_BIT_MASK(64), + * for vendor devices with DMA space address limitations + */ struct virtio_pci_modern_device { struct pci_dev *pci_dev; struct virtio_pci_common_cfg __iomem *common; - /* Device-specific data (non-legacy mode) */ void __iomem *device; - /* Base of vq notifications (non-legacy mode). */ void __iomem *notify_base; - /* Physical base of vq notifications */ resource_size_t notify_pa; - /* Where to read and clear interrupt */ u8 __iomem *isr; - /* So we can sanity-check accesses. */ size_t notify_len; size_t device_len; + size_t common_len; - /* Capability for when we need to map notifications per-vq. */ int notify_map_cap; - /* Multiply queue_notify_off by this value. (non-legacy mode). */ u32 notify_offset_multiplier; - int modern_bars; - struct virtio_device_id id; - /* optional check for vendor virtio device, returns dev_id or -ERRNO */ int (*device_id_check)(struct pci_dev *pdev); - - /* optional mask for devices with limited DMA space */ u64 dma_mask; }; diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index c58453699ee9..ebb3ce63d64d 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -12,6 +12,7 @@ struct virtio_vsock_skb_cb { bool reply; bool tap_delivered; + u32 offset; }; #define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb)) @@ -159,6 +160,15 @@ struct virtio_transport { /* Takes ownership of the packet */ int (*send_pkt)(struct sk_buff *skb); + + /* Used in MSG_ZEROCOPY mode. Checks, that provided data + * (number of buffers) could be transmitted with zerocopy + * mode. If this callback is not implemented for the current + * transport - this means that this transport doesn't need + * extra checks and can perform zerocopy transmission by + * default. + */ + bool (*can_msgzerocopy)(int bufs_num); }; ssize_t diff --git a/include/linux/vlynq.h b/include/linux/vlynq.h deleted file mode 100644 index e9c0cd36c48a..000000000000 --- a/include/linux/vlynq.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006, 2007 Eugene Konev <ejka@openwrt.org> - */ - -#ifndef __VLYNQ_H__ -#define __VLYNQ_H__ - -#include <linux/device.h> -#include <linux/types.h> - -struct module; - -#define VLYNQ_NUM_IRQS 32 - -struct vlynq_mapping { - u32 size; - u32 offset; -}; - -enum vlynq_divisor { - vlynq_div_auto = 0, - vlynq_ldiv1, - vlynq_ldiv2, - vlynq_ldiv3, - vlynq_ldiv4, - vlynq_ldiv5, - vlynq_ldiv6, - vlynq_ldiv7, - vlynq_ldiv8, - vlynq_rdiv1, - vlynq_rdiv2, - vlynq_rdiv3, - vlynq_rdiv4, - vlynq_rdiv5, - vlynq_rdiv6, - vlynq_rdiv7, - vlynq_rdiv8, - vlynq_div_external -}; - -struct vlynq_device_id { - u32 id; - enum vlynq_divisor divisor; - unsigned long driver_data; -}; - -struct vlynq_regs; -struct vlynq_device { - u32 id, dev_id; - int local_irq; - int remote_irq; - enum vlynq_divisor divisor; - u32 regs_start, regs_end; - u32 mem_start, mem_end; - u32 irq_start, irq_end; - int irq; - int enabled; - struct vlynq_regs *local; - struct vlynq_regs *remote; - struct device dev; -}; - -struct vlynq_driver { - char *name; - struct vlynq_device_id *id_table; - int (*probe)(struct vlynq_device *dev, struct vlynq_device_id *id); - void (*remove)(struct vlynq_device *dev); - struct device_driver driver; -}; - -struct plat_vlynq_ops { - int (*on)(struct vlynq_device *dev); - void (*off)(struct vlynq_device *dev); -}; - -static inline struct vlynq_driver *to_vlynq_driver(struct device_driver *drv) -{ - return container_of(drv, struct vlynq_driver, driver); -} - -static inline struct vlynq_device *to_vlynq_device(struct device *device) -{ - return container_of(device, struct vlynq_device, dev); -} - -extern struct bus_type vlynq_bus_type; - -extern int __vlynq_register_driver(struct vlynq_driver *driver, - struct module *owner); - -static inline int vlynq_register_driver(struct vlynq_driver *driver) -{ - return __vlynq_register_driver(driver, THIS_MODULE); -} - -static inline void *vlynq_get_drvdata(struct vlynq_device *dev) -{ - return dev_get_drvdata(&dev->dev); -} - -static inline void vlynq_set_drvdata(struct vlynq_device *dev, void *data) -{ - dev_set_drvdata(&dev->dev, data); -} - -static inline u32 vlynq_mem_start(struct vlynq_device *dev) -{ - return dev->mem_start; -} - -static inline u32 vlynq_mem_end(struct vlynq_device *dev) -{ - return dev->mem_end; -} - -static inline u32 vlynq_mem_len(struct vlynq_device *dev) -{ - return dev->mem_end - dev->mem_start + 1; -} - -static inline int vlynq_virq_to_irq(struct vlynq_device *dev, int virq) -{ - int irq = dev->irq_start + virq; - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - return irq; -} - -static inline int vlynq_irq_to_virq(struct vlynq_device *dev, int irq) -{ - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - return irq - dev->irq_start; -} - -extern void vlynq_unregister_driver(struct vlynq_driver *driver); -extern int vlynq_enable_device(struct vlynq_device *dev); -extern void vlynq_disable_device(struct vlynq_device *dev); -extern int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping); -extern int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping); -extern int vlynq_set_local_irq(struct vlynq_device *dev, int virq); -extern int vlynq_set_remote_irq(struct vlynq_device *dev, int virq); - -#endif /* __VLYNQ_H__ */ diff --git a/include/linux/wait.h b/include/linux/wait.h index a0307b516b09..3473b663176f 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -19,10 +19,9 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int /* wait_queue_entry::flags */ #define WQ_FLAG_EXCLUSIVE 0x01 #define WQ_FLAG_WOKEN 0x02 -#define WQ_FLAG_BOOKMARK 0x04 -#define WQ_FLAG_CUSTOM 0x08 -#define WQ_FLAG_DONE 0x10 -#define WQ_FLAG_PRIORITY 0x20 +#define WQ_FLAG_CUSTOM 0x04 +#define WQ_FLAG_DONE 0x08 +#define WQ_FLAG_PRIORITY 0x10 /* * A single wait-queue entry structure: @@ -210,9 +209,8 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq } int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); +void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); -void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, - unsigned int mode, void *key, wait_queue_entry_t *bookmark); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); @@ -237,6 +235,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head); #define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m)) #define wake_up_poll(x, m) \ __wake_up(x, TASK_NORMAL, 1, poll_to_key(m)) +#define wake_up_poll_on_current_cpu(x, m) \ + __wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m)) #define wake_up_locked_poll(x, m) \ __wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m)) #define wake_up_interruptible_poll(x, m) \ diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index 45cd42f55d49..429c7b6afead 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -32,7 +32,7 @@ struct watch_filter { DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); }; u32 nr_filters; /* Number of filters */ - struct watch_type_filter filters[]; + struct watch_type_filter filters[] __counted_by(nr_filters); }; struct watch_queue { diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 683efe29fa69..24b1e5070f4d 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -125,6 +125,17 @@ struct rcu_work { struct workqueue_struct *wq; }; +enum wq_affn_scope { + WQ_AFFN_DFL, /* use system default */ + WQ_AFFN_CPU, /* one pod per CPU */ + WQ_AFFN_SMT, /* one pod poer SMT */ + WQ_AFFN_CACHE, /* one pod per LLC */ + WQ_AFFN_NUMA, /* one pod per NUMA node */ + WQ_AFFN_SYSTEM, /* one pod across the whole system */ + + WQ_AFFN_NR_TYPES, +}; + /** * struct workqueue_attrs - A struct for workqueue attributes. * @@ -138,17 +149,58 @@ struct workqueue_attrs { /** * @cpumask: allowed CPUs + * + * Work items in this workqueue are affine to these CPUs and not allowed + * to execute on other CPUs. A pool serving a workqueue must have the + * same @cpumask. */ cpumask_var_t cpumask; /** - * @no_numa: disable NUMA affinity + * @__pod_cpumask: internal attribute used to create per-pod pools + * + * Internal use only. + * + * Per-pod unbound worker pools are used to improve locality. Always a + * subset of ->cpumask. A workqueue can be associated with multiple + * worker pools with disjoint @__pod_cpumask's. Whether the enforcement + * of a pool's @__pod_cpumask is strict depends on @affn_strict. + */ + cpumask_var_t __pod_cpumask; + + /** + * @affn_strict: affinity scope is strict + * + * If clear, workqueue will make a best-effort attempt at starting the + * worker inside @__pod_cpumask but the scheduler is free to migrate it + * outside. + * + * If set, workers are only allowed to run inside @__pod_cpumask. + */ + bool affn_strict; + + /* + * Below fields aren't properties of a worker_pool. They only modify how + * :c:func:`apply_workqueue_attrs` select pools and thus don't + * participate in pool hash calculations or equality comparisons. + */ + + /** + * @affn_scope: unbound CPU affinity scope * - * Unlike other fields, ``no_numa`` isn't a property of a worker_pool. It - * only modifies how :c:func:`apply_workqueue_attrs` select pools and thus - * doesn't participate in pool hash calculations or equality comparisons. + * CPU pods are used to improve execution locality of unbound work + * items. There are multiple pod types, one for each wq_affn_scope, and + * every CPU in the system belongs to one pod in every pod type. CPUs + * that belong to the same pod share the worker pool. For example, + * selecting %WQ_AFFN_NUMA makes the workqueue use a separate worker + * pool for each NUMA node. */ - bool no_numa; + enum wq_affn_scope affn_scope; + + /** + * @ordered: work items must be executed one by one in queueing order + */ + bool ordered; }; static inline struct delayed_work *to_delayed_work(struct work_struct *work) @@ -222,18 +274,16 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * to generate better code. */ #ifdef CONFIG_LOCKDEP -#define __INIT_WORK(_work, _func, _onstack) \ +#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ - static struct lock_class_key __key; \ - \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ - lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \ + lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, (_key), 0); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) #else -#define __INIT_WORK(_work, _func, _onstack) \ +#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ @@ -242,12 +292,22 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } } while (0) #endif +#define __INIT_WORK(_work, _func, _onstack) \ + do { \ + static __maybe_unused struct lock_class_key __key; \ + \ + __INIT_WORK_KEY(_work, _func, _onstack, &__key); \ + } while (0) + #define INIT_WORK(_work, _func) \ __INIT_WORK((_work), (_func), 0) #define INIT_WORK_ONSTACK(_work, _func) \ __INIT_WORK((_work), (_func), 1) +#define INIT_WORK_ONSTACK_KEY(_work, _func, _key) \ + __INIT_WORK_KEY((_work), (_func), 1, _key) + #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ @@ -343,14 +403,10 @@ enum { __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ - WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ + WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, }; -/* unbound wq's aren't per-cpu, scale max_active according to #cpus */ -#define WQ_UNBOUND_MAX_ACTIVE \ - max_t(int, WQ_MAX_ACTIVE, num_possible_cpus() * WQ_MAX_UNBOUND_PER_CPU) - /* * System-wide workqueues which are always present. * @@ -391,7 +447,7 @@ extern struct workqueue_struct *system_freezable_power_efficient_wq; * alloc_workqueue - allocate a workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags - * @max_active: max in-flight work items, 0 for default + * @max_active: max in-flight work items per CPU, 0 for default * remaining args: args for @fmt * * Allocate a workqueue with the specified parameters. For detailed @@ -569,6 +625,7 @@ static inline bool schedule_work(struct work_struct *work) /* * Detect attempt to flush system-wide workqueues at compile time when possible. + * Warn attempt to flush system-wide workqueues at runtime. * * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp * for reasons and steps for converting system-wide workqueues into local workqueues. @@ -576,52 +633,13 @@ static inline bool schedule_work(struct work_struct *work) extern void __warn_flushing_systemwide_wq(void) __compiletime_warning("Please avoid flushing system-wide workqueues."); -/** - * flush_scheduled_work - ensure that any scheduled work has run to completion. - * - * Forces execution of the kernel-global workqueue and blocks until its - * completion. - * - * It's very easy to get into trouble if you don't take great care. - * Either of the following situations will lead to deadlock: - * - * One of the work items currently on the workqueue needs to acquire - * a lock held by your code or its caller. - * - * Your code is running in the context of a work routine. - * - * They will be detected by lockdep when they occur, but the first might not - * occur very often. It depends on what work items are on the workqueue and - * what locks they need, which you have no control over. - * - * In most situations flushing the entire workqueue is overkill; you merely - * need to know that a particular work item isn't queued and isn't running. - * In such cases you should use cancel_delayed_work_sync() or - * cancel_work_sync() instead. - * - * Please stop calling this function! A conversion to stop flushing system-wide - * workqueues is in progress. This function will be removed after all in-tree - * users stopped calling this function. - */ -/* - * The background of commit 771c035372a036f8 ("deprecate the - * '__deprecated' attribute warnings entirely and for good") is that, - * since Linus builds all modules between every single pull he does, - * the standard kernel build needs to be _clean_ in order to be able to - * notice when new problems happen. Therefore, don't emit warning while - * there are in-tree users. - */ +/* Please stop using this function, for this function will be removed in near future. */ #define flush_scheduled_work() \ ({ \ - if (0) \ - __warn_flushing_systemwide_wq(); \ + __warn_flushing_systemwide_wq(); \ __flush_workqueue(system_wq); \ }) -/* - * Although there is no longer in-tree caller, for now just emit warning - * in order to give out-of-tree callers time to update. - */ #define flush_workqueue(wq) \ ({ \ struct workqueue_struct *_wq = (wq); \ @@ -683,8 +701,32 @@ static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) return fn(arg); } #else -long work_on_cpu(int cpu, long (*fn)(void *), void *arg); -long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg); +long work_on_cpu_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key); +/* + * A new key is defined for each caller to make sure the work + * associated with the function doesn't share its locking class. + */ +#define work_on_cpu(_cpu, _fn, _arg) \ +({ \ + static struct lock_class_key __key; \ + \ + work_on_cpu_key(_cpu, _fn, _arg, &__key); \ +}) + +long work_on_cpu_safe_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key); + +/* + * A new key is defined for each caller to make sure the work + * associated with the function doesn't share its locking class. + */ +#define work_on_cpu_safe(_cpu, _fn, _arg) \ +({ \ + static struct lock_class_key __key; \ + \ + work_on_cpu_safe_key(_cpu, _fn, _arg, &__key); \ +}) #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER @@ -714,5 +756,6 @@ int workqueue_offline_cpu(unsigned int cpu); void __init workqueue_init_early(void); void __init workqueue_init(void); +void __init workqueue_init_topology(void); #endif diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fba937999fbf..083387c00f0c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -375,11 +375,6 @@ void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); -void folio_account_redirty(struct folio *folio); -static inline void account_page_redirty(struct page *page) -{ - folio_account_redirty(page_folio(page)); -} bool folio_redirty_for_writepage(struct writeback_control *, struct folio *); bool redirty_page_for_writepage(struct writeback_control *, struct page *); diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 741703b45f61..cb571dfcf4b1 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -856,6 +856,9 @@ static inline int __must_check xa_insert_irq(struct xarray *xa, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -886,6 +889,9 @@ static inline __must_check int xa_alloc(struct xarray *xa, u32 *id, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -916,6 +922,9 @@ static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -949,6 +958,9 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the @@ -983,6 +995,9 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the @@ -1017,6 +1032,9 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the diff --git a/include/linux/xattr.h b/include/linux/xattr.h index d591ef59aa98..d20051865800 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -114,13 +114,15 @@ struct simple_xattr { }; void simple_xattrs_init(struct simple_xattrs *xattrs); -void simple_xattrs_free(struct simple_xattrs *xattrs); +void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space); +size_t simple_xattr_space(const char *name, size_t size); struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); +void simple_xattr_free(struct simple_xattr *xattr); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); -int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, - const void *value, size_t size, int flags, - ssize_t *removed_size); +struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, + const char *name, const void *value, + size_t size, int flags); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); void simple_xattr_add(struct simple_xattrs *xattrs, diff --git a/include/linux/zswap.h b/include/linux/zswap.h new file mode 100644 index 000000000000..2a60ce39cfde --- /dev/null +++ b/include/linux/zswap.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ZSWAP_H +#define _LINUX_ZSWAP_H + +#include <linux/types.h> +#include <linux/mm_types.h> + +extern u64 zswap_pool_total_size; +extern atomic_t zswap_stored_pages; + +#ifdef CONFIG_ZSWAP + +bool zswap_store(struct folio *folio); +bool zswap_load(struct folio *folio); +void zswap_invalidate(int type, pgoff_t offset); +void zswap_swapon(int type); +void zswap_swapoff(int type); + +#else + +static inline bool zswap_store(struct folio *folio) +{ + return false; +} + +static inline bool zswap_load(struct folio *folio) +{ + return false; +} + +static inline void zswap_invalidate(int type, pgoff_t offset) {} +static inline void zswap_swapon(int type) {} +static inline void zswap_swapoff(int type) {} + +#endif + +#endif /* _LINUX_ZSWAP_H */ |