diff options
1014 files changed, 15759 insertions, 8337 deletions
@@ -553,6 +553,7 @@ Senthilkumar N L <quic_snlakshm@quicinc.com> <snlakshm@codeaurora.org> Serge Hallyn <sergeh@kernel.org> <serge.hallyn@canonical.com> Serge Hallyn <sergeh@kernel.org> <serue@us.ibm.com> Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com> +Shakeel Butt <shakeel.butt@linux.dev> <shakeelb@google.com> Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io> Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@intel.com> Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@oracle.com> diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst index e73fdff62c0a..c58c72362911 100644 --- a/Documentation/arch/x86/mds.rst +++ b/Documentation/arch/x86/mds.rst @@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing: mds_clear_cpu_buffers() +Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. +Other than CFLAGS.ZF, this macro doesn't clobber any registers. + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. @@ -138,17 +141,30 @@ Mitigation points When transitioning from kernel to user space the CPU buffers are flushed on affected CPUs when the mitigation is not disabled on the kernel - command line. The migitation is enabled through the static key - mds_user_clear. - - The mitigation is invoked in prepare_exit_to_usermode() which covers - all but one of the kernel to user space transitions. The exception - is when we return from a Non Maskable Interrupt (NMI), which is - handled directly in do_nmi(). - - (The reason that NMI is special is that prepare_exit_to_usermode() can - enable IRQs. In NMI context, NMIs are blocked, and we don't want to - enable IRQs with NMIs blocked.) + command line. The mitigation is enabled through the feature flag + X86_FEATURE_CLEAR_CPU_BUF. + + The mitigation is invoked just before transitioning to userspace after + user registers are restored. This is done to minimize the window in + which kernel data could be accessed after VERW e.g. via an NMI after + VERW. + + **Corner case not handled** + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. 2. C-State transition diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 7985c6615f3c..a8f5782bd833 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -177,10 +177,10 @@ In addition to kfuncs' arguments, verifier may need more information about the type of kfunc(s) being registered with the BPF subsystem. To do so, we define flags on a set of kfuncs as follows:: - BTF_SET8_START(bpf_task_set) + BTF_KFUNCS_START(bpf_task_set) BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE) - BTF_SET8_END(bpf_task_set) + BTF_KFUNCS_END(bpf_task_set) This set encodes the BTF ID of each kfunc listed above, and encodes the flags along with it. Ofcourse, it is also allowed to specify no flags. @@ -347,10 +347,10 @@ Once the kfunc is prepared for use, the final step to making it visible is registering it with the BPF subsystem. Registration is done per BPF program type. An example is shown below:: - BTF_SET8_START(bpf_task_set) + BTF_KFUNCS_START(bpf_task_set) BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE) - BTF_SET8_END(bpf_task_set) + BTF_KFUNCS_END(bpf_task_set) static const struct btf_kfunc_id_set bpf_task_kfunc_set = { .owner = THIS_MODULE, diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst index 74d64a30f500..f9cd579496c9 100644 --- a/Documentation/bpf/map_lpm_trie.rst +++ b/Documentation/bpf/map_lpm_trie.rst @@ -17,7 +17,7 @@ significant byte. LPM tries may be created with a maximum prefix length that is a multiple of 8, in the range from 8 to 2048. The key used for lookup and update -operations is a ``struct bpf_lpm_trie_key``, extended by +operations is a ``struct bpf_lpm_trie_key_u8``, extended by ``max_prefixlen/8`` bytes. - For IPv4 addresses the data length is 4 bytes diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst index af43227b6ee4..f3269d6dd5e0 100644 --- a/Documentation/bpf/standardization/instruction-set.rst +++ b/Documentation/bpf/standardization/instruction-set.rst @@ -1,11 +1,11 @@ .. contents:: .. sectnum:: -======================================= -BPF Instruction Set Specification, v1.0 -======================================= +====================================== +BPF Instruction Set Architecture (ISA) +====================================== -This document specifies version 1.0 of the BPF instruction set. +This document specifies the BPF instruction set architecture (ISA). Documentation conventions ========================= @@ -102,7 +102,7 @@ Conformance groups An implementation does not need to support all instructions specified in this document (e.g., deprecated instructions). Instead, a number of conformance -groups are specified. An implementation must support the "basic" conformance +groups are specified. An implementation must support the base32 conformance group and may support additional conformance groups, where supporting a conformance group means it must support all instructions in that conformance group. @@ -112,12 +112,22 @@ that executes instructions, and tools as such compilers that generate instructions for the runtime. Thus, capability discovery in terms of conformance groups might be done manually by users or automatically by tools. -Each conformance group has a short ASCII label (e.g., "basic") that +Each conformance group has a short ASCII label (e.g., "base32") that corresponds to a set of instructions that are mandatory. That is, each instruction has one or more conformance groups of which it is a member. -The "basic" conformance group includes all instructions defined in this -specification unless otherwise noted. +This document defines the following conformance groups: + +* base32: includes all instructions defined in this + specification unless otherwise noted. +* base64: includes base32, plus instructions explicitly noted + as being in the base64 conformance group. +* atomic32: includes 32-bit atomic operation instructions (see `Atomic operations`_). +* atomic64: includes atomic32, plus 64-bit atomic operation instructions. +* divmul32: includes 32-bit division, multiplication, and modulo instructions. +* divmul64: includes divmul32, plus 64-bit division, multiplication, + and modulo instructions. +* legacy: deprecated packet access instructions. Instruction encoding ==================== @@ -166,10 +176,10 @@ Note that most instructions do not use all of the fields. Unused fields shall be cleared to zero. As discussed below in `64-bit immediate instructions`_, a 64-bit immediate -instruction uses a 64-bit immediate value that is constructed as follows. +instruction uses two 32-bit immediate values that are constructed as follows. The 64 bits following the basic instruction contain a pseudo instruction -using the same format but with opcode, dst_reg, src_reg, and offset all set to zero, -and imm containing the high 32 bits of the immediate value. +using the same format but with 'opcode', 'dst_reg', 'src_reg', and 'offset' all +set to zero, and imm containing the high 32 bits of the immediate value. This is depicted in the following figure:: @@ -181,13 +191,8 @@ This is depicted in the following figure:: '--------------' pseudo instruction -Thus the 64-bit immediate value is constructed as follows: - - imm64 = (next_imm << 32) | imm - -where 'next_imm' refers to the imm value of the pseudo instruction -following the basic instruction. The unused bytes in the pseudo -instruction are reserved and shall be cleared to zero. +Here, the imm value of the pseudo instruction is called 'next_imm'. The unused +bytes in the pseudo instruction are reserved and shall be cleared to zero. Instruction classes ------------------- @@ -239,7 +244,8 @@ Arithmetic instructions ----------------------- ``BPF_ALU`` uses 32-bit wide operands while ``BPF_ALU64`` uses 64-bit wide operands for -otherwise identical operations. +otherwise identical operations. ``BPF_ALU64`` instructions belong to the +base64 conformance group unless noted otherwise. The 'code' field encodes the operation as below, where 'src' and 'dst' refer to the values of the source and destination registers, respectively. @@ -284,15 +290,19 @@ where '(u32)' indicates that the upper 32 bits are zeroed. ``BPF_XOR | BPF_K | BPF_ALU`` means:: - dst = (u32) dst ^ (u32) imm32 + dst = (u32) dst ^ (u32) imm ``BPF_XOR | BPF_K | BPF_ALU64`` means:: - dst = dst ^ imm32 + dst = dst ^ imm Note that most instructions have instruction offset of 0. Only three instructions (``BPF_SDIV``, ``BPF_SMOD``, ``BPF_MOVSX``) have a non-zero offset. +Division, multiplication, and modulo operations for ``BPF_ALU`` are part +of the "divmul32" conformance group, and division, multiplication, and +modulo operations for ``BPF_ALU64`` are part of the "divmul64" conformance +group. The division and modulo operations support both unsigned and signed flavors. For unsigned operations (``BPF_DIV`` and ``BPF_MOD``), for ``BPF_ALU``, @@ -349,7 +359,9 @@ BPF_ALU64 Reserved 0x00 do byte swap unconditionally ========= ========= ===== ================================================= The 'imm' field encodes the width of the swap operations. The following widths -are supported: 16, 32 and 64. +are supported: 16, 32 and 64. Width 64 operations belong to the base64 +conformance group and other swap operations belong to the base32 +conformance group. Examples: @@ -374,31 +386,33 @@ Examples: Jump instructions ----------------- -``BPF_JMP32`` uses 32-bit wide operands while ``BPF_JMP`` uses 64-bit wide operands for -otherwise identical operations. +``BPF_JMP32`` uses 32-bit wide operands and indicates the base32 +conformance group, while ``BPF_JMP`` uses 64-bit wide operands for +otherwise identical operations, and indicates the base64 conformance +group unless otherwise specified. The 'code' field encodes the operation as below: -======== ===== === =============================== ============================================= -code value src description notes -======== ===== === =============================== ============================================= -BPF_JA 0x0 0x0 PC += offset BPF_JMP | BPF_K only -BPF_JA 0x0 0x0 PC += imm BPF_JMP32 | BPF_K only -BPF_JEQ 0x1 any PC += offset if dst == src -BPF_JGT 0x2 any PC += offset if dst > src unsigned -BPF_JGE 0x3 any PC += offset if dst >= src unsigned -BPF_JSET 0x4 any PC += offset if dst & src -BPF_JNE 0x5 any PC += offset if dst != src -BPF_JSGT 0x6 any PC += offset if dst > src signed -BPF_JSGE 0x7 any PC += offset if dst >= src signed -BPF_CALL 0x8 0x0 call helper function by address BPF_JMP | BPF_K only, see `Helper functions`_ -BPF_CALL 0x8 0x1 call PC += imm BPF_JMP | BPF_K only, see `Program-local functions`_ -BPF_CALL 0x8 0x2 call helper function by BTF ID BPF_JMP | BPF_K only, see `Helper functions`_ -BPF_EXIT 0x9 0x0 return BPF_JMP | BPF_K only -BPF_JLT 0xa any PC += offset if dst < src unsigned -BPF_JLE 0xb any PC += offset if dst <= src unsigned -BPF_JSLT 0xc any PC += offset if dst < src signed -BPF_JSLE 0xd any PC += offset if dst <= src signed -======== ===== === =============================== ============================================= +======== ===== ======= =============================== ============================================= +code value src_reg description notes +======== ===== ======= =============================== ============================================= +BPF_JA 0x0 0x0 PC += offset BPF_JMP | BPF_K only +BPF_JA 0x0 0x0 PC += imm BPF_JMP32 | BPF_K only +BPF_JEQ 0x1 any PC += offset if dst == src +BPF_JGT 0x2 any PC += offset if dst > src unsigned +BPF_JGE 0x3 any PC += offset if dst >= src unsigned +BPF_JSET 0x4 any PC += offset if dst & src +BPF_JNE 0x5 any PC += offset if dst != src +BPF_JSGT 0x6 any PC += offset if dst > src signed +BPF_JSGE 0x7 any PC += offset if dst >= src signed +BPF_CALL 0x8 0x0 call helper function by address BPF_JMP | BPF_K only, see `Helper functions`_ +BPF_CALL 0x8 0x1 call PC += imm BPF_JMP | BPF_K only, see `Program-local functions`_ +BPF_CALL 0x8 0x2 call helper function by BTF ID BPF_JMP | BPF_K only, see `Helper functions`_ +BPF_EXIT 0x9 0x0 return BPF_JMP | BPF_K only +BPF_JLT 0xa any PC += offset if dst < src unsigned +BPF_JLE 0xb any PC += offset if dst <= src unsigned +BPF_JSLT 0xc any PC += offset if dst < src signed +BPF_JSLE 0xd any PC += offset if dst <= src signed +======== ===== ======= =============================== ============================================= The BPF program needs to store the return value into register R0 before doing a ``BPF_EXIT``. @@ -424,6 +438,9 @@ specified by the 'imm' field. A > 16-bit conditional jump may be converted to a < 16-bit conditional jump plus a 32-bit unconditional jump. +All ``BPF_CALL`` and ``BPF_JA`` instructions belong to the +base32 conformance group. + Helper functions ~~~~~~~~~~~~~~~~ @@ -481,6 +498,8 @@ The size modifier is one of: BPF_DW 0x18 double word (8 bytes) ============= ===== ===================== +Instructions using ``BPF_DW`` belong to the base64 conformance group. + Regular load and store operations --------------------------------- @@ -493,7 +512,7 @@ instructions that transfer data between a register and memory. ``BPF_MEM | <size> | BPF_ST`` means:: - *(size *) (dst + offset) = imm32 + *(size *) (dst + offset) = imm ``BPF_MEM | <size> | BPF_LDX`` means:: @@ -525,8 +544,10 @@ by other BPF programs or means outside of this specification. All atomic operations supported by BPF are encoded as store operations that use the ``BPF_ATOMIC`` mode modifier as follows: -* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations -* ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations +* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations, which are + part of the "atomic32" conformance group. +* ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations, which are + part of the "atomic64" conformance group. * 8-bit and 16-bit wide atomic operations are not supported. The 'imm' field is used to encode the actual atomic operation. @@ -547,7 +568,7 @@ BPF_XOR 0xa0 atomic xor *(u32 *)(dst + offset) += src -``BPF_ATOMIC | BPF_DW | BPF_STX`` with 'imm' = BPF ADD means:: +``BPF_ATOMIC | BPF_DW | BPF_STX`` with 'imm' = BPF_ADD means:: *(u64 *)(dst + offset) += src @@ -580,24 +601,24 @@ and loaded back to ``R0``. ----------------------------- Instructions with the ``BPF_IMM`` 'mode' modifier use the wide instruction -encoding defined in `Instruction encoding`_, and use the 'src' field of the +encoding defined in `Instruction encoding`_, and use the 'src_reg' field of the basic instruction to hold an opcode subtype. The following table defines a set of ``BPF_IMM | BPF_DW | BPF_LD`` instructions -with opcode subtypes in the 'src' field, using new terms such as "map" +with opcode subtypes in the 'src_reg' field, using new terms such as "map" defined further below: -========================= ====== === ========================================= =========== ============== -opcode construction opcode src pseudocode imm type dst type -========================= ====== === ========================================= =========== ============== -BPF_IMM | BPF_DW | BPF_LD 0x18 0x0 dst = imm64 integer integer -BPF_IMM | BPF_DW | BPF_LD 0x18 0x1 dst = map_by_fd(imm) map fd map -BPF_IMM | BPF_DW | BPF_LD 0x18 0x2 dst = map_val(map_by_fd(imm)) + next_imm map fd data pointer -BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer -BPF_IMM | BPF_DW | BPF_LD 0x18 0x4 dst = code_addr(imm) integer code pointer -BPF_IMM | BPF_DW | BPF_LD 0x18 0x5 dst = map_by_idx(imm) map index map -BPF_IMM | BPF_DW | BPF_LD 0x18 0x6 dst = map_val(map_by_idx(imm)) + next_imm map index data pointer -========================= ====== === ========================================= =========== ============== +========================= ====== ======= ========================================= =========== ============== +opcode construction opcode src_reg pseudocode imm type dst type +========================= ====== ======= ========================================= =========== ============== +BPF_IMM | BPF_DW | BPF_LD 0x18 0x0 dst = (next_imm << 32) | imm integer integer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x1 dst = map_by_fd(imm) map fd map +BPF_IMM | BPF_DW | BPF_LD 0x18 0x2 dst = map_val(map_by_fd(imm)) + next_imm map fd data pointer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x4 dst = code_addr(imm) integer code pointer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x5 dst = map_by_idx(imm) map index map +BPF_IMM | BPF_DW | BPF_LD 0x18 0x6 dst = map_val(map_by_idx(imm)) + next_imm map index data pointer +========================= ====== ======= ========================================= =========== ============== where @@ -635,7 +656,9 @@ Legacy BPF Packet access instructions ------------------------------------- BPF previously introduced special instructions for access to packet data that were -carried over from classic BPF. However, these instructions are -deprecated and should no longer be used. All legacy packet access -instructions belong to the "legacy" conformance group instead of the "basic" -conformance group. +carried over from classic BPF. These instructions used an instruction +class of BPF_LD, a size modifier of BPF_W, BPF_H, or BPF_B, and a +mode modifier of BPF_ABS or BPF_IND. The 'dst_reg' and 'offset' fields were +set to zero, and 'src_reg' was set to zero for BPF_ABS. However, these +instructions are deprecated and should no longer be used. All legacy packet +access instructions belong to the "legacy" conformance group. diff --git a/Documentation/conf.py b/Documentation/conf.py index 5830b01c5642..da64c9fb7e07 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -388,6 +388,12 @@ latex_elements = { verbatimhintsturnover=false, ''', + # + # Some of our authors are fond of deep nesting; tell latex to + # cope. + # + 'maxlistdepth': '10', + # For CJK One-half spacing, need to be in front of hyperref 'extrapackages': r'\usepackage{setspace}', diff --git a/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml b/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml index 3eebc03a309b..ca7fdada3ff2 100644 --- a/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml +++ b/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml @@ -85,8 +85,8 @@ allOf: clock-names: items: - - const: dout_cmu_misc_bus - - const: dout_cmu_misc_sss + - const: bus + - const: sss additionalProperties: false diff --git a/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml b/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml index 5edfbe347341..a31a202afe5c 100644 --- a/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml +++ b/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml @@ -41,6 +41,8 @@ properties: pwm-names: true + active-low: true + color: true required: diff --git a/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml b/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml index 75d8138298fb..660e2ca42daf 100644 --- a/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml +++ b/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml @@ -17,6 +17,10 @@ properties: oneOf: - items: - enum: + - brcm,bcm74165b0-asp + - const: brcm,asp-v2.2 + - items: + - enum: - brcm,bcm74165-asp - const: brcm,asp-v2.1 - items: diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml index 6684810fcbf0..23dfe0838dca 100644 --- a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml +++ b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml @@ -24,6 +24,7 @@ properties: - brcm,genet-mdio-v5 - brcm,asp-v2.0-mdio - brcm,asp-v2.1-mdio + - brcm,asp-v2.2-mdio - brcm,unimac-mdio reg: diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index 890f7858d0dc..de7ba7f345a9 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -46,6 +46,7 @@ properties: - enum: - renesas,etheravb-r8a779a0 # R-Car V3U - renesas,etheravb-r8a779g0 # R-Car V4H + - renesas,etheravb-r8a779h0 # R-Car V4M - const: renesas,etheravb-rcar-gen4 # R-Car Gen4 - items: diff --git a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml index 475aff7714d6..ea35d19be829 100644 --- a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml +++ b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml @@ -65,9 +65,11 @@ properties: rx-internal-delay-ps: enum: [0, 1800] + default: 0 tx-internal-delay-ps: enum: [0, 2000] + default: 0 '#address-cells': const: 1 diff --git a/Documentation/devicetree/bindings/net/ti,dp83822.yaml b/Documentation/devicetree/bindings/net/ti,dp83822.yaml index 8f23254c0458..784866ea392b 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83822.yaml +++ b/Documentation/devicetree/bindings/net/ti,dp83822.yaml @@ -84,10 +84,10 @@ properties: description: | If present, select the RMII operation mode. Two modes are available: - - RMII master, where the PHY operates from a 25MHz clock reference, - provided by a crystal or a CMOS-level oscillator - - RMII slave, where the PHY operates from a 50MHz clock reference, - provided by a CMOS-level oscillator + - RMII master, where the PHY outputs a 50MHz reference clock which can + be connected to the MAC. + - RMII slave, where the PHY expects a 50MHz reference clock input + shared with the MAC. The RMII operation mode can also be configured by its straps. If the strap pin is not set correctly or not set at all, then this can be used to configure it. diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst index e3d593841aa7..ea8d16600e16 100644 --- a/Documentation/driver-api/dpll.rst +++ b/Documentation/driver-api/dpll.rst @@ -545,7 +545,7 @@ In such scenario, dpll device input signal shall be also configurable to drive dpll with signal recovered from the PHY netdevice. This is done by exposing a pin to the netdevice - attaching pin to the netdevice itself with -``netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``. +``dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``. Exposed pin id handle ``DPLL_A_PIN_ID`` is then identifiable by the user as it is attached to rtnetlink respond to get ``RTM_NEWLINK`` command in nested attribute ``IFLA_DPLL_PIN``. diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index c58f7153fcf8..3ebd50d78820 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -126,8 +126,9 @@ properties: Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- type: string enum-name: - description: Name for the enum type of the attribute. - type: string + description: | + Name for the enum type of the attribute, if empty no name will be used. + type: [ string, "null" ] doc: description: Documentation of the space. type: string @@ -261,14 +262,16 @@ properties: the prefix with the upper case name of the command, with dashes replaced by underscores. type: string enum-name: - description: Name for the enum type with commands. - type: string + description: | + Name for the enum type with commands, if empty no name will be used. + type: [ string, "null" ] async-prefix: description: Same as name-prefix but used to render notifications and events to separate enum. type: string async-enum: - description: Name for the enum type with notifications/events. - type: string + description: | + Name for the enum type with commands, if empty no name will be used. + type: [ string, "null" ] list: description: List of commands type: array diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 938703088306..1d3fe3637707 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -168,8 +168,9 @@ properties: Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- type: string enum-name: - description: Name for the enum type of the attribute. - type: string + description: | + Name for the enum type of the attribute, if empty no name will be used. + type: [ string, "null" ] doc: description: Documentation of the space. type: string @@ -304,14 +305,16 @@ properties: the prefix with the upper case name of the command, with dashes replaced by underscores. type: string enum-name: - description: Name for the enum type with commands. - type: string + description: | + Name for the enum type with commands, if empty no name will be used. + type: [ string, "null" ] async-prefix: description: Same as name-prefix but used to render notifications and events to separate enum. type: string async-enum: - description: Name for the enum type with notifications/events. - type: string + description: | + Name for the enum type with commands, if empty no name will be used. + type: [ string, "null" ] # Start genetlink-legacy fixed-header: &fixed-header description: | diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index ac4e05415f2f..40fc8ab1ee44 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -189,8 +189,9 @@ properties: Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- type: string enum-name: - description: Name for the enum type of the attribute. - type: string + description: | + Name for the enum type of the attribute, if empty no name will be used. + type: [ string, "null" ] doc: description: Documentation of the space. type: string @@ -371,14 +372,16 @@ properties: the prefix with the upper case name of the command, with dashes replaced by underscores. type: string enum-name: - description: Name for the enum type with commands. - type: string + description: | + Name for the enum type with commands, if empty no name will be used. + type: [ string, "null" ] async-prefix: description: Same as name-prefix but used to render notifications and events to separate enum. type: string async-enum: - description: Name for the enum type with notifications/events. - type: string + description: | + Name for the enum type with commands, if empty no name will be used. + type: [ string, "null" ] # Start genetlink-legacy fixed-header: &fixed-header description: | diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index 8dc1df5cfae7..95b0eb1486bf 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -312,6 +312,7 @@ attribute-sets: - name: capabilities type: u32 + enum: pin-capabilities - name: parent-device type: nest diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index 49f90cfb4698..af525ed29792 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -292,13 +292,14 @@ operations: - name: get-addr doc: Get endpoint information - attribute-set: endpoint + attribute-set: attr dont-validate: [ strict ] flags: [ uns-admin-perm ] do: &get-addr-attrs request: attributes: - addr + - token reply: attributes: - addr diff --git a/Documentation/netlink/specs/nlctrl.yaml b/Documentation/netlink/specs/nlctrl.yaml new file mode 100644 index 000000000000..b1632b95f725 --- /dev/null +++ b/Documentation/netlink/specs/nlctrl.yaml @@ -0,0 +1,206 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: nlctrl +protocol: genetlink-legacy +uapi-header: linux/genetlink.h + +doc: | + genetlink meta-family that exposes information about all genetlink + families registered in the kernel (including itself). + +definitions: + - + name: op-flags + type: flags + enum-name: + entries: + - admin-perm + - cmd-cap-do + - cmd-cap-dump + - cmd-cap-haspol + - uns-admin-perm + - + name: attr-type + enum-name: netlink-attribute-type + type: enum + entries: + - invalid + - flag + - u8 + - u16 + - u32 + - u64 + - s8 + - s16 + - s32 + - s64 + - binary + - string + - nul-string + - nested + - nested-array + - bitfield32 + - sint + - uint + +attribute-sets: + - + name: ctrl-attrs + name-prefix: ctrl-attr- + attributes: + - + name: family-id + type: u16 + - + name: family-name + type: string + - + name: version + type: u32 + - + name: hdrsize + type: u32 + - + name: maxattr + type: u32 + - + name: ops + type: array-nest + nested-attributes: op-attrs + - + name: mcast-groups + type: array-nest + nested-attributes: mcast-group-attrs + - + name: policy + type: nest-type-value + type-value: [ policy-id, attr-id ] + nested-attributes: policy-attrs + - + name: op-policy + type: nest-type-value + type-value: [ op-id ] + nested-attributes: op-policy-attrs + - + name: op + type: u32 + - + name: mcast-group-attrs + name-prefix: ctrl-attr-mcast-grp- + enum-name: + attributes: + - + name: name + type: string + - + name: id + type: u32 + - + name: op-attrs + name-prefix: ctrl-attr-op- + enum-name: + attributes: + - + name: id + type: u32 + - + name: flags + type: u32 + enum: op-flags + enum-as-flags: true + - + name: policy-attrs + name-prefix: nl-policy-type-attr- + enum-name: + attributes: + - + name: type + type: u32 + enum: attr-type + - + name: min-value-s + type: s64 + - + name: max-value-s + type: s64 + - + name: min-value-u + type: u64 + - + name: max-value-u + type: u64 + - + name: min-length + type: u32 + - + name: max-length + type: u32 + - + name: policy-idx + type: u32 + - + name: policy-maxtype + type: u32 + - + name: bitfield32-mask + type: u32 + - + name: mask + type: u64 + - + name: pad + type: pad + - + name: op-policy-attrs + name-prefix: ctrl-attr-policy- + enum-name: + attributes: + - + name: do + type: u32 + - + name: dump + type: u32 + +operations: + enum-model: directional + name-prefix: ctrl-cmd- + list: + - + name: getfamily + doc: Get / dump genetlink families + attribute-set: ctrl-attrs + do: + request: + value: 3 + attributes: + - family-name + reply: &all-attrs + value: 1 + attributes: + - family-id + - family-name + - hdrsize + - maxattr + - mcast-groups + - ops + - version + dump: + reply: *all-attrs + - + name: getpolicy + doc: Get / dump genetlink policies + attribute-set: ctrl-attrs + dump: + request: + value: 10 + attributes: + - family-name + - family-id + - op + reply: + value: 10 + attributes: + - family-id + - op-policy + - policy diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index dceeb0d763aa..72da7057e4cf 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -329,23 +329,24 @@ XDP_SHARED_UMEM option and provide the initial socket's fd in the sxdp_shared_umem_fd field as you registered the UMEM on that socket. These two sockets will now share one and the same UMEM. -There is no need to supply an XDP program like the one in the previous -case where sockets were bound to the same queue id and -device. Instead, use the NIC's packet steering capabilities to steer -the packets to the right queue. In the previous example, there is only -one queue shared among sockets, so the NIC cannot do this steering. It -can only steer between queues. - -In libbpf, you need to use the xsk_socket__create_shared() API as it -takes a reference to a FILL ring and a COMPLETION ring that will be -created for you and bound to the shared UMEM. You can use this -function for all the sockets you create, or you can use it for the -second and following ones and use xsk_socket__create() for the first -one. Both methods yield the same result. +In this case, it is possible to use the NIC's packet steering +capabilities to steer the packets to the right queue. This is not +possible in the previous example as there is only one queue shared +among sockets, so the NIC cannot do this steering as it can only steer +between queues. + +In libxdp (or libbpf prior to version 1.0), you need to use the +xsk_socket__create_shared() API as it takes a reference to a FILL ring +and a COMPLETION ring that will be created for you and bound to the +shared UMEM. You can use this function for all the sockets you create, +or you can use it for the second and following ones and use +xsk_socket__create() for the first one. Both methods yield the same +result. Note that a UMEM can be shared between sockets on the same queue id and device, as well as between queues on the same device and between -devices at the same time. +devices at the same time. It is also possible to redirect to any +socket as long as it is bound to the same umem with XDP_SHARED_UMEM. XDP_USE_NEED_WAKEUP bind flag ----------------------------- @@ -822,6 +823,10 @@ A: The short answer is no, that is not supported at the moment. The switch, or other distribution mechanism, in your NIC to direct traffic to the correct queue id and socket. + Note that if you are using the XDP_SHARED_UMEM option, it is + possible to switch traffic between any socket bound to the same + umem. + Q: My packets are sometimes corrupted. What is wrong? A: Care has to be taken not to feed the same buffer in the UMEM into diff --git a/Documentation/networking/net_cachelines/inet_sock.rst b/Documentation/networking/net_cachelines/inet_sock.rst index a2babd0d7954..595d7ef5fc8b 100644 --- a/Documentation/networking/net_cachelines/inet_sock.rst +++ b/Documentation/networking/net_cachelines/inet_sock.rst @@ -1,9 +1,9 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2023 Google LLC -===================================================== -inet_connection_sock struct fast path usage breakdown -===================================================== +========================================== +inet_sock struct fast path usage breakdown +========================================== Type Name fastpath_tx_access fastpath_rx_access comment ..struct ..inet_sock diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst index 8054d33f449f..5bf285d73e8a 100644 --- a/Documentation/networking/sfp-phylink.rst +++ b/Documentation/networking/sfp-phylink.rst @@ -231,16 +231,136 @@ this documentation. For further information on these methods, please see the inline documentation in :c:type:`struct phylink_mac_ops <phylink_mac_ops>`. -9. Remove calls to of_parse_phandle() for the PHY, - of_phy_register_fixed_link() for fixed links etc. from the probe - function, and replace with: +9. Fill-in the :c:type:`struct phylink_config <phylink_config>` fields with + a reference to the :c:type:`struct device <device>` associated to your + :c:type:`struct net_device <net_device>`: .. code-block:: c - struct phylink *phylink; priv->phylink_config.dev = &dev.dev; priv->phylink_config.type = PHYLINK_NETDEV; + Fill-in the various speeds, pause and duplex modes your MAC can handle: + + .. code-block:: c + + priv->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD; + +10. Some Ethernet controllers work in pair with a PCS (Physical Coding Sublayer) + block, that can handle among other things the encoding/decoding, link + establishment detection and autonegotiation. While some MACs have internal + PCS whose operation is transparent, some other require dedicated PCS + configuration for the link to become functional. In that case, phylink + provides a PCS abstraction through :c:type:`struct phylink_pcs <phylink_pcs>`. + + Identify if your driver has one or more internal PCS blocks, and/or if + your controller can use an external PCS block that might be internally + connected to your controller. + + If your controller doesn't have any internal PCS, you can go to step 11. + + If your Ethernet controller contains one or several PCS blocks, create + one :c:type:`struct phylink_pcs <phylink_pcs>` instance per PCS block within + your driver's private data structure: + + .. code-block:: c + + struct phylink_pcs pcs; + + Populate the relevant :c:type:`struct phylink_pcs_ops <phylink_pcs_ops>` to + configure your PCS. Create a :c:func:`pcs_get_state` function that reports + the inband link state, a :c:func:`pcs_config` function to configure your + PCS according to phylink-provided parameters, and a :c:func:`pcs_validate` + function that report to phylink all accepted configuration parameters for + your PCS: + + .. code-block:: c + + struct phylink_pcs_ops foo_pcs_ops = { + .pcs_validate = foo_pcs_validate, + .pcs_get_state = foo_pcs_get_state, + .pcs_config = foo_pcs_config, + }; + + Arrange for PCS link state interrupts to be forwarded into + phylink, via: + + .. code-block:: c + + phylink_pcs_change(pcs, link_is_up); + + where ``link_is_up`` is true if the link is currently up or false + otherwise. If a PCS is unable to provide these interrupts, then + it should set ``pcs->pcs_poll = true;`` when creating the PCS. + +11. If your controller relies on, or accepts the presence of an external PCS + controlled through its own driver, add a pointer to a phylink_pcs instance + in your driver private data structure: + + .. code-block:: c + + struct phylink_pcs *pcs; + + The way of getting an instance of the actual PCS depends on the platform, + some PCS sit on an MDIO bus and are grabbed by passing a pointer to the + corresponding :c:type:`struct mii_bus <mii_bus>` and the PCS's address on + that bus. In this example, we assume the controller attaches to a Lynx PCS + instance: + + .. code-block:: c + + priv->pcs = lynx_pcs_create_mdiodev(bus, 0); + + Some PCS can be recovered based on firmware information: + + .. code-block:: c + + priv->pcs = lynx_pcs_create_fwnode(of_fwnode_handle(node)); + +12. Populate the :c:func:`mac_select_pcs` callback and add it to your + :c:type:`struct phylink_mac_ops <phylink_mac_ops>` set of ops. This function + must return a pointer to the relevant :c:type:`struct phylink_pcs <phylink_pcs>` + that will be used for the requested link configuration: + + .. code-block:: c + + static struct phylink_pcs *foo_select_pcs(struct phylink_config *config, + phy_interface_t interface) + { + struct foo_priv *priv = container_of(config, struct foo_priv, + phylink_config); + + if ( /* 'interface' needs a PCS to function */ ) + return priv->pcs; + + return NULL; + } + + See :c:func:`mvpp2_select_pcs` for an example of a driver that has multiple + internal PCS. + +13. Fill-in all the :c:type:`phy_interface_t <phy_interface_t>` (i.e. all MAC to + PHY link modes) that your MAC can output. The following example shows a + configuration for a MAC that can handle all RGMII modes, SGMII and 1000BaseX. + You must adjust these according to what your MAC and all PCS associated + with this MAC are capable of, and not just the interface you wish to use: + + .. code-block:: c + + phy_interface_set_rgmii(priv->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_SGMII, + priv->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + priv->phylink_config.supported_interfaces); + +14. Remove calls to of_parse_phandle() for the PHY, + of_phy_register_fixed_link() for fixed links etc. from the probe + function, and replace with: + + .. code-block:: c + + struct phylink *phylink; + phylink = phylink_create(&priv->phylink_config, node, phy_mode, &phylink_ops); if (IS_ERR(phylink)) { err = PTR_ERR(phylink); @@ -249,14 +369,14 @@ this documentation. priv->phylink = phylink; - and arrange to destroy the phylink in the probe failure path as - appropriate and the removal path too by calling: + and arrange to destroy the phylink in the probe failure path as + appropriate and the removal path too by calling: - .. code-block:: c + .. code-block:: c phylink_destroy(priv->phylink); -10. Arrange for MAC link state interrupts to be forwarded into +15. Arrange for MAC link state interrupts to be forwarded into phylink, via: .. code-block:: c @@ -264,17 +384,16 @@ this documentation. phylink_mac_change(priv->phylink, link_is_up); where ``link_is_up`` is true if the link is currently up or false - otherwise. If a MAC is unable to provide these interrupts, then - it should set ``priv->phylink_config.pcs_poll = true;`` in step 9. + otherwise. -11. Verify that the driver does not call:: +16. Verify that the driver does not call:: netif_carrier_on() netif_carrier_off() - as these will interfere with phylink's tracking of the link state, - and cause phylink to omit calls via the :c:func:`mac_link_up` and - :c:func:`mac_link_down` methods. + as these will interfere with phylink's tracking of the link state, + and cause phylink to omit calls via the :c:func:`mac_link_up` and + :c:func:`mac_link_down` methods. Network drivers should call phylink_stop() and phylink_start() via their suspend/resume paths, which ensures that the appropriate diff --git a/Documentation/sphinx/translations.py b/Documentation/sphinx/translations.py index 47161e6eba99..32c2b32b2b5e 100644 --- a/Documentation/sphinx/translations.py +++ b/Documentation/sphinx/translations.py @@ -29,10 +29,7 @@ all_languages = { } class LanguagesNode(nodes.Element): - def __init__(self, current_language, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.current_language = current_language + pass class TranslationsTransform(Transform): default_priority = 900 @@ -49,7 +46,8 @@ class TranslationsTransform(Transform): # normalize docname to be the untranslated one docname = os.path.join(*components[2:]) - new_nodes = LanguagesNode(all_languages[this_lang_code]) + new_nodes = LanguagesNode() + new_nodes['current_language'] = all_languages[this_lang_code] for lang_code, lang_name in all_languages.items(): if lang_code == this_lang_code: @@ -84,7 +82,7 @@ def process_languages(app, doctree, docname): html_content = app.builder.templates.render('translations.html', context={ - 'current_language': node.current_language, + 'current_language': node['current_language'], 'languages': languages, }) diff --git a/Documentation/virt/hyperv/index.rst b/Documentation/virt/hyperv/index.rst index 4a7a1b738bbe..de447e11b4a5 100644 --- a/Documentation/virt/hyperv/index.rst +++ b/Documentation/virt/hyperv/index.rst @@ -10,3 +10,4 @@ Hyper-V Enlightenments overview vmbus clocks + vpci diff --git a/Documentation/virt/hyperv/vpci.rst b/Documentation/virt/hyperv/vpci.rst new file mode 100644 index 000000000000..b65b2126ede3 --- /dev/null +++ b/Documentation/virt/hyperv/vpci.rst @@ -0,0 +1,316 @@ +.. SPDX-License-Identifier: GPL-2.0 + +PCI pass-thru devices +========================= +In a Hyper-V guest VM, PCI pass-thru devices (also called +virtual PCI devices, or vPCI devices) are physical PCI devices +that are mapped directly into the VM's physical address space. +Guest device drivers can interact directly with the hardware +without intermediation by the host hypervisor. This approach +provides higher bandwidth access to the device with lower +latency, compared with devices that are virtualized by the +hypervisor. The device should appear to the guest just as it +would when running on bare metal, so no changes are required +to the Linux device drivers for the device. + +Hyper-V terminology for vPCI devices is "Discrete Device +Assignment" (DDA). Public documentation for Hyper-V DDA is +available here: `DDA`_ + +.. _DDA: https://learn.microsoft.com/en-us/windows-server/virtualization/hyper-v/plan/plan-for-deploying-devices-using-discrete-device-assignment + +DDA is typically used for storage controllers, such as NVMe, +and for GPUs. A similar mechanism for NICs is called SR-IOV +and produces the same benefits by allowing a guest device +driver to interact directly with the hardware. See Hyper-V +public documentation here: `SR-IOV`_ + +.. _SR-IOV: https://learn.microsoft.com/en-us/windows-hardware/drivers/network/overview-of-single-root-i-o-virtualization--sr-iov- + +This discussion of vPCI devices includes DDA and SR-IOV +devices. + +Device Presentation +------------------- +Hyper-V provides full PCI functionality for a vPCI device when +it is operating, so the Linux device driver for the device can +be used unchanged, provided it uses the correct Linux kernel +APIs for accessing PCI config space and for other integration +with Linux. But the initial detection of the PCI device and +its integration with the Linux PCI subsystem must use Hyper-V +specific mechanisms. Consequently, vPCI devices on Hyper-V +have a dual identity. They are initially presented to Linux +guests as VMBus devices via the standard VMBus "offer" +mechanism, so they have a VMBus identity and appear under +/sys/bus/vmbus/devices. The VMBus vPCI driver in Linux at +drivers/pci/controller/pci-hyperv.c handles a newly introduced +vPCI device by fabricating a PCI bus topology and creating all +the normal PCI device data structures in Linux that would +exist if the PCI device were discovered via ACPI on a bare- +metal system. Once those data structures are set up, the +device also has a normal PCI identity in Linux, and the normal +Linux device driver for the vPCI device can function as if it +were running in Linux on bare-metal. Because vPCI devices are +presented dynamically through the VMBus offer mechanism, they +do not appear in the Linux guest's ACPI tables. vPCI devices +may be added to a VM or removed from a VM at any time during +the life of the VM, and not just during initial boot. + +With this approach, the vPCI device is a VMBus device and a +PCI device at the same time. In response to the VMBus offer +message, the hv_pci_probe() function runs and establishes a +VMBus connection to the vPCI VSP on the Hyper-V host. That +connection has a single VMBus channel. The channel is used to +exchange messages with the vPCI VSP for the purpose of setting +up and configuring the vPCI device in Linux. Once the device +is fully configured in Linux as a PCI device, the VMBus +channel is used only if Linux changes the vCPU to be interrupted +in the guest, or if the vPCI device is removed from +the VM while the VM is running. The ongoing operation of the +device happens directly between the Linux device driver for +the device and the hardware, with VMBus and the VMBus channel +playing no role. + +PCI Device Setup +---------------- +PCI device setup follows a sequence that Hyper-V originally +created for Windows guests, and that can be ill-suited for +Linux guests due to differences in the overall structure of +the Linux PCI subsystem compared with Windows. Nonetheless, +with a bit of hackery in the Hyper-V virtual PCI driver for +Linux, the virtual PCI device is setup in Linux so that +generic Linux PCI subsystem code and the Linux driver for the +device "just work". + +Each vPCI device is set up in Linux to be in its own PCI +domain with a host bridge. The PCI domainID is derived from +bytes 4 and 5 of the instance GUID assigned to the VMBus vPCI +device. The Hyper-V host does not guarantee that these bytes +are unique, so hv_pci_probe() has an algorithm to resolve +collisions. The collision resolution is intended to be stable +across reboots of the same VM so that the PCI domainIDs don't +change, as the domainID appears in the user space +configuration of some devices. + +hv_pci_probe() allocates a guest MMIO range to be used as PCI +config space for the device. This MMIO range is communicated +to the Hyper-V host over the VMBus channel as part of telling +the host that the device is ready to enter d0. See +hv_pci_enter_d0(). When the guest subsequently accesses this +MMIO range, the Hyper-V host intercepts the accesses and maps +them to the physical device PCI config space. + +hv_pci_probe() also gets BAR information for the device from +the Hyper-V host, and uses this information to allocate MMIO +space for the BARs. That MMIO space is then setup to be +associated with the host bridge so that it works when generic +PCI subsystem code in Linux processes the BARs. + +Finally, hv_pci_probe() creates the root PCI bus. At this +point the Hyper-V virtual PCI driver hackery is done, and the +normal Linux PCI machinery for scanning the root bus works to +detect the device, to perform driver matching, and to +initialize the driver and device. + +PCI Device Removal +------------------ +A Hyper-V host may initiate removal of a vPCI device from a +guest VM at any time during the life of the VM. The removal +is instigated by an admin action taken on the Hyper-V host and +is not under the control of the guest OS. + +A guest VM is notified of the removal by an unsolicited +"Eject" message sent from the host to the guest over the VMBus +channel associated with the vPCI device. Upon receipt of such +a message, the Hyper-V virtual PCI driver in Linux +asynchronously invokes Linux kernel PCI subsystem calls to +shutdown and remove the device. When those calls are +complete, an "Ejection Complete" message is sent back to +Hyper-V over the VMBus channel indicating that the device has +been removed. At this point, Hyper-V sends a VMBus rescind +message to the Linux guest, which the VMBus driver in Linux +processes by removing the VMBus identity for the device. Once +that processing is complete, all vestiges of the device having +been present are gone from the Linux kernel. The rescind +message also indicates to the guest that Hyper-V has stopped +providing support for the vPCI device in the guest. If the +guest were to attempt to access that device's MMIO space, it +would be an invalid reference. Hypercalls affecting the device +return errors, and any further messages sent in the VMBus +channel are ignored. + +After sending the Eject message, Hyper-V allows the guest VM +60 seconds to cleanly shutdown the device and respond with +Ejection Complete before sending the VMBus rescind +message. If for any reason the Eject steps don't complete +within the allowed 60 seconds, the Hyper-V host forcibly +performs the rescind steps, which will likely result in +cascading errors in the guest because the device is now no +longer present from the guest standpoint and accessing the +device MMIO space will fail. + +Because ejection is asynchronous and can happen at any point +during the guest VM lifecycle, proper synchronization in the +Hyper-V virtual PCI driver is very tricky. Ejection has been +observed even before a newly offered vPCI device has been +fully setup. The Hyper-V virtual PCI driver has been updated +several times over the years to fix race conditions when +ejections happen at inopportune times. Care must be taken when +modifying this code to prevent re-introducing such problems. +See comments in the code. + +Interrupt Assignment +-------------------- +The Hyper-V virtual PCI driver supports vPCI devices using +MSI, multi-MSI, or MSI-X. Assigning the guest vCPU that will +receive the interrupt for a particular MSI or MSI-X message is +complex because of the way the Linux setup of IRQs maps onto +the Hyper-V interfaces. For the single-MSI and MSI-X cases, +Linux calls hv_compse_msi_msg() twice, with the first call +containing a dummy vCPU and the second call containing the +real vCPU. Furthermore, hv_irq_unmask() is finally called +(on x86) or the GICD registers are set (on arm64) to specify +the real vCPU again. Each of these three calls interact +with Hyper-V, which must decide which physical CPU should +receive the interrupt before it is forwarded to the guest VM. +Unfortunately, the Hyper-V decision-making process is a bit +limited, and can result in concentrating the physical +interrupts on a single CPU, causing a performance bottleneck. +See details about how this is resolved in the extensive +comment above the function hv_compose_msi_req_get_cpu(). + +The Hyper-V virtual PCI driver implements the +irq_chip.irq_compose_msi_msg function as hv_compose_msi_msg(). +Unfortunately, on Hyper-V the implementation requires sending +a VMBus message to the Hyper-V host and awaiting an interrupt +indicating receipt of a reply message. Since +irq_chip.irq_compose_msi_msg can be called with IRQ locks +held, it doesn't work to do the normal sleep until awakened by +the interrupt. Instead hv_compose_msi_msg() must send the +VMBus message, and then poll for the completion message. As +further complexity, the vPCI device could be ejected/rescinded +while the polling is in progress, so this scenario must be +detected as well. See comments in the code regarding this +very tricky area. + +Most of the code in the Hyper-V virtual PCI driver (pci- +hyperv.c) applies to Hyper-V and Linux guests running on x86 +and on arm64 architectures. But there are differences in how +interrupt assignments are managed. On x86, the Hyper-V +virtual PCI driver in the guest must make a hypercall to tell +Hyper-V which guest vCPU should be interrupted by each +MSI/MSI-X interrupt, and the x86 interrupt vector number that +the x86_vector IRQ domain has picked for the interrupt. This +hypercall is made by hv_arch_irq_unmask(). On arm64, the +Hyper-V virtual PCI driver manages the allocation of an SPI +for each MSI/MSI-X interrupt. The Hyper-V virtual PCI driver +stores the allocated SPI in the architectural GICD registers, +which Hyper-V emulates, so no hypercall is necessary as with +x86. Hyper-V does not support using LPIs for vPCI devices in +arm64 guest VMs because it does not emulate a GICv3 ITS. + +The Hyper-V virtual PCI driver in Linux supports vPCI devices +whose drivers create managed or unmanaged Linux IRQs. If the +smp_affinity for an unmanaged IRQ is updated via the /proc/irq +interface, the Hyper-V virtual PCI driver is called to tell +the Hyper-V host to change the interrupt targeting and +everything works properly. However, on x86 if the x86_vector +IRQ domain needs to reassign an interrupt vector due to +running out of vectors on a CPU, there's no path to inform the +Hyper-V host of the change, and things break. Fortunately, +guest VMs operate in a constrained device environment where +using all the vectors on a CPU doesn't happen. Since such a +problem is only a theoretical concern rather than a practical +concern, it has been left unaddressed. + +DMA +--- +By default, Hyper-V pins all guest VM memory in the host +when the VM is created, and programs the physical IOMMU to +allow the VM to have DMA access to all its memory. Hence +it is safe to assign PCI devices to the VM, and allow the +guest operating system to program the DMA transfers. The +physical IOMMU prevents a malicious guest from initiating +DMA to memory belonging to the host or to other VMs on the +host. From the Linux guest standpoint, such DMA transfers +are in "direct" mode since Hyper-V does not provide a virtual +IOMMU in the guest. + +Hyper-V assumes that physical PCI devices always perform +cache-coherent DMA. When running on x86, this behavior is +required by the architecture. When running on arm64, the +architecture allows for both cache-coherent and +non-cache-coherent devices, with the behavior of each device +specified in the ACPI DSDT. But when a PCI device is assigned +to a guest VM, that device does not appear in the DSDT, so the +Hyper-V VMBus driver propagates cache-coherency information +from the VMBus node in the ACPI DSDT to all VMBus devices, +including vPCI devices (since they have a dual identity as a VMBus +device and as a PCI device). See vmbus_dma_configure(). +Current Hyper-V versions always indicate that the VMBus is +cache coherent, so vPCI devices on arm64 always get marked as +cache coherent and the CPU does not perform any sync +operations as part of dma_map/unmap_*() calls. + +vPCI protocol versions +---------------------- +As previously described, during vPCI device setup and teardown +messages are passed over a VMBus channel between the Hyper-V +host and the Hyper-v vPCI driver in the Linux guest. Some +messages have been revised in newer versions of Hyper-V, so +the guest and host must agree on the vPCI protocol version to +be used. The version is negotiated when communication over +the VMBus channel is first established. See +hv_pci_protocol_negotiation(). Newer versions of the protocol +extend support to VMs with more than 64 vCPUs, and provide +additional information about the vPCI device, such as the +guest virtual NUMA node to which it is most closely affined in +the underlying hardware. + +Guest NUMA node affinity +------------------------ +When the vPCI protocol version provides it, the guest NUMA +node affinity of the vPCI device is stored as part of the Linux +device information for subsequent use by the Linux driver. See +hv_pci_assign_numa_node(). If the negotiated protocol version +does not support the host providing NUMA affinity information, +the Linux guest defaults the device NUMA node to 0. But even +when the negotiated protocol version includes NUMA affinity +information, the ability of the host to provide such +information depends on certain host configuration options. If +the guest receives NUMA node value "0", it could mean NUMA +node 0, or it could mean "no information is available". +Unfortunately it is not possible to distinguish the two cases +from the guest side. + +PCI config space access in a CoCo VM +------------------------------------ +Linux PCI device drivers access PCI config space using a +standard set of functions provided by the Linux PCI subsystem. +In Hyper-V guests these standard functions map to functions +hv_pcifront_read_config() and hv_pcifront_write_config() +in the Hyper-V virtual PCI driver. In normal VMs, +these hv_pcifront_*() functions directly access the PCI config +space, and the accesses trap to Hyper-V to be handled. +But in CoCo VMs, memory encryption prevents Hyper-V +from reading the guest instruction stream to emulate the +access, so the hv_pcifront_*() functions must invoke +hypercalls with explicit arguments describing the access to be +made. + +Config Block back-channel +------------------------- +The Hyper-V host and Hyper-V virtual PCI driver in Linux +together implement a non-standard back-channel communication +path between the host and guest. The back-channel path uses +messages sent over the VMBus channel associated with the vPCI +device. The functions hyperv_read_cfg_blk() and +hyperv_write_cfg_blk() are the primary interfaces provided to +other parts of the Linux kernel. As of this writing, these +interfaces are used only by the Mellanox mlx5 driver to pass +diagnostic data to a Hyper-V host running in the Azure public +cloud. The functions hyperv_read_cfg_blk() and +hyperv_write_cfg_blk() are implemented in a separate module +(pci-hyperv-intf.c, under CONFIG_PCI_HYPERV_INTERFACE) that +effectively stubs them out when running in non-Hyper-V +environments. diff --git a/MAINTAINERS b/MAINTAINERS index cfe44a876d8a..90a58c73dbde 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1395,6 +1395,7 @@ F: drivers/hwmon/max31760.c ANALOGBITS PLL LIBRARIES M: Paul Walmsley <paul.walmsley@sifive.com> +M: Samuel Holland <samuel.holland@sifive.com> S: Supported F: drivers/clk/analogbits/* F: include/linux/clk/analogbits* @@ -2156,7 +2157,7 @@ M: Shawn Guo <shawnguo@kernel.org> M: Sascha Hauer <s.hauer@pengutronix.de> R: Pengutronix Kernel Team <kernel@pengutronix.de> R: Fabio Estevam <festevam@gmail.com> -R: NXP Linux Team <linux-imx@nxp.com> +L: imx@lists.linux.dev L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git @@ -5381,7 +5382,7 @@ CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG) M: Johannes Weiner <hannes@cmpxchg.org> M: Michal Hocko <mhocko@kernel.org> M: Roman Gushchin <roman.gushchin@linux.dev> -M: Shakeel Butt <shakeelb@google.com> +M: Shakeel Butt <shakeel.butt@linux.dev> R: Muchun Song <muchun.song@linux.dev> L: cgroups@vger.kernel.org L: linux-mm@kvack.org @@ -8505,7 +8506,7 @@ FREESCALE IMX / MXC FEC DRIVER M: Wei Fang <wei.fang@nxp.com> R: Shenwei Wang <shenwei.wang@nxp.com> R: Clark Wang <xiaoning.wang@nxp.com> -R: NXP Linux Team <linux-imx@nxp.com> +L: imx@lists.linux.dev L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/fsl,fec.yaml @@ -8540,7 +8541,7 @@ F: drivers/i2c/busses/i2c-imx.c FREESCALE IMX LPI2C DRIVER M: Dong Aisheng <aisheng.dong@nxp.com> L: linux-i2c@vger.kernel.org -L: linux-imx@nxp.com +L: imx@lists.linux.dev S: Maintained F: Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml F: drivers/i2c/busses/i2c-imx-lpi2c.c @@ -14130,6 +14131,17 @@ F: mm/ F: tools/mm/ F: tools/testing/selftests/mm/ +MEMORY MAPPING +M: Andrew Morton <akpm@linux-foundation.org> +R: Liam R. Howlett <Liam.Howlett@oracle.com> +R: Vlastimil Babka <vbabka@suse.cz> +R: Lorenzo Stoakes <lstoakes@gmail.com> +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: mm/mmap.c + MEMORY TECHNOLOGY DEVICES (MTD) M: Miquel Raynal <miquel.raynal@bootlin.com> M: Richard Weinberger <richard@nod.at> @@ -14388,7 +14400,7 @@ MICROCHIP MCP16502 PMIC DRIVER M: Claudiu Beznea <claudiu.beznea@tuxon.dev> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported -F: Documentation/devicetree/bindings/regulator/mcp16502-regulator.txt +F: Documentation/devicetree/bindings/regulator/microchip,mcp16502.yaml F: drivers/regulator/mcp16502.c MICROCHIP MCP3564 ADC DRIVER @@ -15736,7 +15748,7 @@ F: drivers/iio/gyro/fxas21002c_spi.c NXP i.MX 7D/6SX/6UL/93 AND VF610 ADC DRIVER M: Haibo Chen <haibo.chen@nxp.com> L: linux-iio@vger.kernel.org -L: linux-imx@nxp.com +L: imx@lists.linux.dev S: Maintained F: Documentation/devicetree/bindings/iio/adc/fsl,imx7d-adc.yaml F: Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml @@ -15773,7 +15785,7 @@ F: drivers/gpu/drm/imx/dcss/ NXP i.MX 8QXP ADC DRIVER M: Cai Huoqing <cai.huoqing@linux.dev> M: Haibo Chen <haibo.chen@nxp.com> -L: linux-imx@nxp.com +L: imx@lists.linux.dev L: linux-iio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml @@ -15781,7 +15793,7 @@ F: drivers/iio/adc/imx8qxp-adc.c NXP i.MX 8QXP/8QM JPEG V4L2 DRIVER M: Mirela Rabulea <mirela.rabulea@nxp.com> -R: NXP Linux Team <linux-imx@nxp.com> +L: imx@lists.linux.dev L: linux-media@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/media/nxp,imx8-jpeg.yaml @@ -15791,7 +15803,7 @@ NXP i.MX CLOCK DRIVERS M: Abel Vesa <abelvesa@kernel.org> R: Peng Fan <peng.fan@nxp.com> L: linux-clk@vger.kernel.org -L: linux-imx@nxp.com +L: imx@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/abelvesa/linux.git clk/imx F: Documentation/devicetree/bindings/clock/imx* @@ -16752,6 +16764,7 @@ F: drivers/pci/controller/dwc/*layerscape* PCI DRIVER FOR FU740 M: Paul Walmsley <paul.walmsley@sifive.com> M: Greentime Hu <greentime.hu@sifive.com> +M: Samuel Holland <samuel.holland@sifive.com> L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/sifive,fu740-pcie.yaml @@ -18004,33 +18017,34 @@ F: drivers/media/tuners/qt1010* QUALCOMM ATH12K WIRELESS DRIVER M: Kalle Valo <kvalo@kernel.org> -M: Jeff Johnson <quic_jjohnson@quicinc.com> +M: Jeff Johnson <jjohnson@kernel.org> L: ath12k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath12k T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git F: drivers/net/wireless/ath/ath12k/ +N: ath12k QUALCOMM ATHEROS ATH10K WIRELESS DRIVER M: Kalle Valo <kvalo@kernel.org> -M: Jeff Johnson <quic_jjohnson@quicinc.com> +M: Jeff Johnson <jjohnson@kernel.org> L: ath10k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git -F: Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml F: drivers/net/wireless/ath/ath10k/ +N: ath10k QUALCOMM ATHEROS ATH11K WIRELESS DRIVER M: Kalle Valo <kvalo@kernel.org> -M: Jeff Johnson <quic_jjohnson@quicinc.com> +M: Jeff Johnson <jjohnson@kernel.org> L: ath11k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k B: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k/bugreport T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git -F: Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml F: drivers/net/wireless/ath/ath11k/ +N: ath11k QUALCOMM ATHEROS ATH9K WIRELESS DRIVER M: Toke Høiland-Jørgensen <toke@toke.dk> @@ -19668,7 +19682,7 @@ F: drivers/mmc/host/sdhci-of-at91.c SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) NXP i.MX DRIVER M: Haibo Chen <haibo.chen@nxp.com> -L: linux-imx@nxp.com +L: imx@lists.linux.dev L: linux-mmc@vger.kernel.org S: Maintained F: drivers/mmc/host/sdhci-esdhc-imx.c @@ -20003,36 +20017,15 @@ S: Maintained F: drivers/watchdog/simatic-ipc-wdt.c SIFIVE DRIVERS -M: Palmer Dabbelt <palmer@dabbelt.com> M: Paul Walmsley <paul.walmsley@sifive.com> +M: Samuel Holland <samuel.holland@sifive.com> L: linux-riscv@lists.infradead.org S: Supported +F: drivers/dma/sf-pdma/ N: sifive +K: fu[57]40 K: [^@]sifive -SIFIVE CACHE DRIVER -M: Conor Dooley <conor@kernel.org> -L: linux-riscv@lists.infradead.org -S: Maintained -F: Documentation/devicetree/bindings/cache/sifive,ccache0.yaml -F: drivers/cache/sifive_ccache.c - -SIFIVE FU540 SYSTEM-ON-CHIP -M: Paul Walmsley <paul.walmsley@sifive.com> -M: Palmer Dabbelt <palmer@dabbelt.com> -L: linux-riscv@lists.infradead.org -S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/pjw/sifive.git -N: fu540 -K: fu540 - -SIFIVE PDMA DRIVER -M: Green Wan <green.wan@sifive.com> -S: Maintained -F: Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml -F: drivers/dma/sf-pdma/ - - SILEAD TOUCHSCREEN DRIVER M: Hans de Goede <hdegoede@redhat.com> L: linux-input@vger.kernel.org @@ -20241,8 +20234,8 @@ F: Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml F: drivers/net/ethernet/socionext/sni_ave.c SOCIONEXT (SNI) NETSEC NETWORK DRIVER -M: Jassi Brar <jaswinder.singh@linaro.org> M: Ilias Apalodimas <ilias.apalodimas@linaro.org> +M: Masahisa Kojima <kojima.masahisa@socionext.com> L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/socionext,synquacer-netsec.yaml @@ -22907,9 +22900,8 @@ S: Maintained F: drivers/usb/typec/mux/pi3usb30532.c USB TYPEC PORT CONTROLLER DRIVERS -M: Guenter Roeck <linux@roeck-us.net> L: linux-usb@vger.kernel.org -S: Maintained +S: Orphan F: drivers/usb/typec/tcpm/ USB UHCI DRIVER @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc7 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/amazon/alpine.dtsi b/arch/arm/boot/dts/amazon/alpine.dtsi index ff68dfb4eb78..90bd12feac01 100644 --- a/arch/arm/boot/dts/amazon/alpine.dtsi +++ b/arch/arm/boot/dts/amazon/alpine.dtsi @@ -167,7 +167,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <96>; al,msi-num-spis = <64>; diff --git a/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi index 530491ae5eb2..857cb26ed6d7 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi @@ -466,7 +466,6 @@ i2c0: i2c-bus@40 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x40 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -482,7 +481,6 @@ i2c1: i2c-bus@80 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x80 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -498,7 +496,6 @@ i2c2: i2c-bus@c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0xc0 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -515,7 +512,6 @@ i2c3: i2c-bus@100 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x100 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -532,7 +528,6 @@ i2c4: i2c-bus@140 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x140 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -549,7 +544,6 @@ i2c5: i2c-bus@180 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x180 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -566,7 +560,6 @@ i2c6: i2c-bus@1c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x1c0 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -583,7 +576,6 @@ i2c7: i2c-bus@300 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x300 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -600,7 +592,6 @@ i2c8: i2c-bus@340 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x340 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -617,7 +608,6 @@ i2c9: i2c-bus@380 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x380 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -634,7 +624,6 @@ i2c10: i2c-bus@3c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x3c0 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -651,7 +640,6 @@ i2c11: i2c-bus@400 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x400 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -668,7 +656,6 @@ i2c12: i2c-bus@440 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x440 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -685,7 +672,6 @@ i2c13: i2c-bus@480 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x480 0x40>; compatible = "aspeed,ast2400-i2c-bus"; diff --git a/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi index 04f98d1dbb97..e6f3cf3c721e 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi +++ b/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi @@ -363,6 +363,7 @@ interrupts = <40>; reg = <0x1e780200 0x0100>; clocks = <&syscon ASPEED_CLK_APB>; + #interrupt-cells = <2>; interrupt-controller; bus-frequency = <12000000>; pinctrl-names = "default"; @@ -594,7 +595,6 @@ i2c0: i2c-bus@40 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x40 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -610,7 +610,6 @@ i2c1: i2c-bus@80 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x80 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -626,7 +625,6 @@ i2c2: i2c-bus@c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0xc0 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -643,7 +641,6 @@ i2c3: i2c-bus@100 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x100 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -660,7 +657,6 @@ i2c4: i2c-bus@140 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x140 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -677,7 +673,6 @@ i2c5: i2c-bus@180 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x180 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -694,7 +689,6 @@ i2c6: i2c-bus@1c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x1c0 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -711,7 +705,6 @@ i2c7: i2c-bus@300 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x300 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -728,7 +721,6 @@ i2c8: i2c-bus@340 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x340 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -745,7 +737,6 @@ i2c9: i2c-bus@380 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x380 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -762,7 +753,6 @@ i2c10: i2c-bus@3c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x3c0 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -779,7 +769,6 @@ i2c11: i2c-bus@400 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x400 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -796,7 +785,6 @@ i2c12: i2c-bus@440 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x440 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -813,7 +801,6 @@ i2c13: i2c-bus@480 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x480 0x40>; compatible = "aspeed,ast2500-i2c-bus"; diff --git a/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi index c4d1faade8be..29f94696d8b1 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi @@ -474,6 +474,7 @@ reg = <0x1e780500 0x100>; interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>; clocks = <&syscon ASPEED_CLK_APB2>; + #interrupt-cells = <2>; interrupt-controller; bus-frequency = <12000000>; pinctrl-names = "default"; @@ -488,6 +489,7 @@ reg = <0x1e780600 0x100>; interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>; clocks = <&syscon ASPEED_CLK_APB2>; + #interrupt-cells = <2>; interrupt-controller; bus-frequency = <12000000>; pinctrl-names = "default"; @@ -902,7 +904,6 @@ i2c0: i2c-bus@80 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x80 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -917,7 +918,6 @@ i2c1: i2c-bus@100 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x100 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -932,7 +932,6 @@ i2c2: i2c-bus@180 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x180 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -947,7 +946,6 @@ i2c3: i2c-bus@200 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x200 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -962,7 +960,6 @@ i2c4: i2c-bus@280 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x280 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -977,7 +974,6 @@ i2c5: i2c-bus@300 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x300 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -992,7 +988,6 @@ i2c6: i2c-bus@380 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x380 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1007,7 +1002,6 @@ i2c7: i2c-bus@400 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x400 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1022,7 +1016,6 @@ i2c8: i2c-bus@480 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x480 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1037,7 +1030,6 @@ i2c9: i2c-bus@500 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x500 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1052,7 +1044,6 @@ i2c10: i2c-bus@580 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x580 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1067,7 +1058,6 @@ i2c11: i2c-bus@600 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x600 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1082,7 +1072,6 @@ i2c12: i2c-bus@680 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x680 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1097,7 +1086,6 @@ i2c13: i2c-bus@700 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x700 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1112,7 +1100,6 @@ i2c14: i2c-bus@780 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x780 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1127,7 +1114,6 @@ i2c15: i2c-bus@800 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x800 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; diff --git a/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi b/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi index f9f79ed82518..07ca0d993c9f 100644 --- a/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi @@ -167,6 +167,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&mailbox>; interrupts = <0>; }; @@ -247,6 +248,7 @@ gpio-controller; interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>; interrupt-controller; + #interrupt-cells = <2>; }; i2c1: i2c@1800b000 { @@ -518,6 +520,7 @@ gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>; gpio-ranges = <&pinctrl 0 42 1>, <&pinctrl 1 44 3>, diff --git a/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi b/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi index 788a6806191a..75545b10ef2f 100644 --- a/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi @@ -200,6 +200,7 @@ gpio-controller; ngpios = <4>; interrupt-controller; + #interrupt-cells = <2>; interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi b/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi index 9d20ba3b1ffb..6a4482c93167 100644 --- a/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi @@ -180,6 +180,7 @@ gpio-controller; ngpios = <32>; interrupt-controller; + #interrupt-cells = <2>; interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>; gpio-ranges = <&pinctrl 0 0 32>; }; @@ -352,6 +353,7 @@ gpio-controller; ngpios = <4>; interrupt-controller; + #interrupt-cells = <2>; interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts index 4d70f6afd13a..6d5e69035f94 100644 --- a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts +++ b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts @@ -60,6 +60,8 @@ * We have slots (IDSEL) 1 and 2 with one assigned IRQ * each handling all IRQs. */ + #interrupt-cells = <1>; + interrupt-map-mask = <0xf800 0 0 7>; interrupt-map = /* IDSEL 1 */ <0x0800 0 0 1 &gpio0 11 IRQ_TYPE_LEVEL_LOW>, /* INT A on slot 1 is irq 11 */ diff --git a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts index 9ec0169bacf8..5f4c849915db 100644 --- a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts +++ b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts @@ -89,6 +89,8 @@ * The slots have Ethernet, Ethernet, NEC and MPCI. * The IDSELs are 11, 12, 13, 14. */ + #interrupt-cells = <1>; + interrupt-map-mask = <0xf800 0 0 7>; interrupt-map = /* IDSEL 11 - Ethernet A */ <0x5800 0 0 1 &gpio0 4 IRQ_TYPE_LEVEL_LOW>, /* INT A on slot 11 is irq 4 */ diff --git a/arch/arm/boot/dts/marvell/kirkwood-l-50.dts b/arch/arm/boot/dts/marvell/kirkwood-l-50.dts index dffb9f84e67c..c841eb8e7fb1 100644 --- a/arch/arm/boot/dts/marvell/kirkwood-l-50.dts +++ b/arch/arm/boot/dts/marvell/kirkwood-l-50.dts @@ -65,6 +65,7 @@ gpio2: gpio-expander@20 { #gpio-cells = <2>; #interrupt-cells = <2>; + interrupt-controller; compatible = "semtech,sx1505q"; reg = <0x20>; @@ -79,6 +80,7 @@ gpio3: gpio-expander@21 { #gpio-cells = <2>; #interrupt-cells = <2>; + interrupt-controller; compatible = "semtech,sx1505q"; reg = <0x21>; diff --git a/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi b/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi index fd671c7a1e5d..6e1f0f164cb4 100644 --- a/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi +++ b/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi @@ -120,6 +120,7 @@ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>, <3 IRQ_TYPE_LEVEL_HIGH>, <4 IRQ_TYPE_LEVEL_HIGH>; + #interrupt-cells = <2>; interrupt-controller; }; @@ -128,6 +129,7 @@ gpio-controller; #gpio-cells = <2>; interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; + #interrupt-cells = <2>; interrupt-controller; }; diff --git a/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi b/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi index 1640763fd4af..ff0d684622f7 100644 --- a/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi +++ b/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi @@ -997,7 +997,6 @@ compatible = "st,stmpe811"; reg = <0x41>; irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; - interrupt-controller; id = <0>; blocks = <0x5>; irq-trigger = <0x1>; diff --git a/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi b/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi index 3b6fad273cab..d38f1dd38a90 100644 --- a/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi +++ b/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi @@ -980,7 +980,6 @@ compatible = "st,stmpe811"; reg = <0x41>; irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; - interrupt-controller; id = <0>; blocks = <0x5>; irq-trigger = <0x1>; diff --git a/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi b/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi index 4eb526fe9c55..81c8a5fd92cc 100644 --- a/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi +++ b/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi @@ -861,7 +861,6 @@ compatible = "st,stmpe811"; reg = <0x41>; irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; - interrupt-controller; id = <0>; blocks = <0x5>; irq-trigger = <0x1>; diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts b/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts index db8c332df6a1..cad112e05475 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts @@ -227,7 +227,6 @@ #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; bridge@2,1 { compatible = "pci10b5,8605"; @@ -235,7 +234,6 @@ #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; /* Intel Corporation I210 Gigabit Network Connection */ ethernet@3,0 { @@ -250,7 +248,6 @@ #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; /* Intel Corporation I210 Gigabit Network Connection */ switch_nic: ethernet@4,0 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi index 99f4f6ac71d4..c1ae7c47b442 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi @@ -245,6 +245,7 @@ reg = <0x74>; gpio-controller; #gpio-cells = <2>; + #interrupt-cells = <2>; interrupt-controller; interrupt-parent = <&gpio2>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; @@ -390,7 +391,6 @@ #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; }; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi index 2ae93f57fe5a..ea40623d12e5 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi @@ -626,7 +626,6 @@ blocks = <0x5>; id = <0>; interrupts = <10 IRQ_TYPE_LEVEL_LOW>; - interrupt-controller; interrupt-parent = <&gpio4>; irq-trigger = <0x1>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi index 55c90f6393ad..d3a7a6eeb8e0 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi @@ -550,7 +550,6 @@ blocks = <0x5>; interrupts = <20 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&gpio6>; - interrupt-controller; id = <0>; irq-trigger = <0x1>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi index a63e73adc1fc..42b2ba23aefc 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi @@ -225,7 +225,6 @@ pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio2>; interrupts = <8 IRQ_TYPE_LEVEL_LOW>; - interrupt-controller; onkey { compatible = "dlg,da9063-onkey"; diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi index 113974520d54..c0c47adc5866 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi @@ -124,6 +124,7 @@ reg = <0x58>; interrupt-parent = <&gpio2>; interrupts = <9 IRQ_TYPE_LEVEL_LOW>; /* active-low GPIO2_9 */ + #interrupt-cells = <2>; interrupt-controller; regulators { diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi index 86b4269e0e01..85e278eb2016 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi @@ -100,6 +100,7 @@ interrupt-parent = <&gpio1>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; gpio-controller; #gpio-cells = <2>; diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts b/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts index 12361fcbe24a..1b965652291b 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts +++ b/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts @@ -63,6 +63,7 @@ gpio-controller; #gpio-cells = <2>; #interrupt-cells = <2>; + interrupt-controller; reg = <0x25>; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi index ebf7befcc11e..9c81c6baa2d3 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi @@ -834,16 +834,6 @@ <&clks IMX7D_LCDIF_PIXEL_ROOT_CLK>; clock-names = "pix", "axi"; status = "disabled"; - - port { - #address-cells = <1>; - #size-cells = <0>; - - lcdif_out_mipi_dsi: endpoint@0 { - reg = <0>; - remote-endpoint = <&mipi_dsi_in_lcdif>; - }; - }; }; mipi_csi: mipi-csi@30750000 { @@ -895,22 +885,6 @@ samsung,esc-clock-frequency = <20000000>; samsung,pll-clock-frequency = <24000000>; status = "disabled"; - - ports { - #address-cells = <1>; - #size-cells = <0>; - - port@0 { - reg = <0>; - #address-cells = <1>; - #size-cells = <0>; - - mipi_dsi_in_lcdif: endpoint@0 { - reg = <0>; - remote-endpoint = <&lcdif_out_mipi_dsi>; - }; - }; - }; }; }; diff --git a/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts index b0ed68af0546..029f49be40e3 100644 --- a/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts +++ b/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts @@ -338,6 +338,7 @@ reg = <0x22>; gpio-controller; #gpio-cells = <2>; + #interrupt-cells = <2>; interrupt-controller; interrupt-parent = <&gpio3>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi index 2045fc779f88..27429d0fedfb 100644 --- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi @@ -340,10 +340,10 @@ "msi8"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 0 0 141 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ - <0 0 0 2 &intc 0 0 0 142 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ - <0 0 0 3 &intc 0 0 0 143 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ - <0 0 0 4 &intc 0 0 0 144 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ + interrupt-map = <0 0 0 1 &intc 0 141 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ + <0 0 0 2 &intc 0 142 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ + <0 0 0 3 &intc 0 143 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ + <0 0 0 4 &intc 0 144 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ clocks = <&gcc GCC_PCIE_PIPE_CLK>, <&gcc GCC_PCIE_AUX_CLK>, diff --git a/arch/arm/boot/dts/renesas/r8a7790-lager.dts b/arch/arm/boot/dts/renesas/r8a7790-lager.dts index 2fba4d084001..8590981245a6 100644 --- a/arch/arm/boot/dts/renesas/r8a7790-lager.dts +++ b/arch/arm/boot/dts/renesas/r8a7790-lager.dts @@ -447,6 +447,7 @@ interrupt-parent = <&irqc0>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; rtc { compatible = "dlg,da9063-rtc"; diff --git a/arch/arm/boot/dts/renesas/r8a7790-stout.dts b/arch/arm/boot/dts/renesas/r8a7790-stout.dts index f9bc5b4f019d..683f7395fab0 100644 --- a/arch/arm/boot/dts/renesas/r8a7790-stout.dts +++ b/arch/arm/boot/dts/renesas/r8a7790-stout.dts @@ -347,6 +347,7 @@ interrupt-parent = <&irqc0>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; onkey { compatible = "dlg,da9063-onkey"; diff --git a/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts b/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts index e9c13bb03772..0efd9f98c75a 100644 --- a/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts +++ b/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts @@ -819,6 +819,7 @@ interrupt-parent = <&irqc0>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; rtc { compatible = "dlg,da9063-rtc"; diff --git a/arch/arm/boot/dts/renesas/r8a7791-porter.dts b/arch/arm/boot/dts/renesas/r8a7791-porter.dts index 7e8bc06715f6..93c86e921645 100644 --- a/arch/arm/boot/dts/renesas/r8a7791-porter.dts +++ b/arch/arm/boot/dts/renesas/r8a7791-porter.dts @@ -413,6 +413,7 @@ interrupt-parent = <&irqc0>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; watchdog { compatible = "dlg,da9063-watchdog"; diff --git a/arch/arm/boot/dts/renesas/r8a7792-blanche.dts b/arch/arm/boot/dts/renesas/r8a7792-blanche.dts index 4f9838cf97ee..540a9ad28f28 100644 --- a/arch/arm/boot/dts/renesas/r8a7792-blanche.dts +++ b/arch/arm/boot/dts/renesas/r8a7792-blanche.dts @@ -381,6 +381,7 @@ interrupt-parent = <&irqc>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; rtc { compatible = "dlg,da9063-rtc"; diff --git a/arch/arm/boot/dts/renesas/r8a7793-gose.dts b/arch/arm/boot/dts/renesas/r8a7793-gose.dts index 1744fdbf9e0c..1ea6c757893b 100644 --- a/arch/arm/boot/dts/renesas/r8a7793-gose.dts +++ b/arch/arm/boot/dts/renesas/r8a7793-gose.dts @@ -759,6 +759,7 @@ interrupt-parent = <&irqc0>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; rtc { compatible = "dlg,da9063-rtc"; diff --git a/arch/arm/boot/dts/renesas/r8a7794-alt.dts b/arch/arm/boot/dts/renesas/r8a7794-alt.dts index c0d067df22a0..b5ecafbb2e4d 100644 --- a/arch/arm/boot/dts/renesas/r8a7794-alt.dts +++ b/arch/arm/boot/dts/renesas/r8a7794-alt.dts @@ -453,6 +453,7 @@ interrupt-parent = <&gpio3>; interrupts = <31 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; rtc { compatible = "dlg,da9063-rtc"; diff --git a/arch/arm/boot/dts/renesas/r8a7794-silk.dts b/arch/arm/boot/dts/renesas/r8a7794-silk.dts index 43d480a7f3ea..595e074085eb 100644 --- a/arch/arm/boot/dts/renesas/r8a7794-silk.dts +++ b/arch/arm/boot/dts/renesas/r8a7794-silk.dts @@ -439,6 +439,7 @@ interrupt-parent = <&gpio3>; interrupts = <31 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; onkey { compatible = "dlg,da9063-onkey"; diff --git a/arch/arm/boot/dts/rockchip/rv1108.dtsi b/arch/arm/boot/dts/rockchip/rv1108.dtsi index abf3006f0a84..f3291f3bbc6f 100644 --- a/arch/arm/boot/dts/rockchip/rv1108.dtsi +++ b/arch/arm/boot/dts/rockchip/rv1108.dtsi @@ -196,7 +196,6 @@ pwm4: pwm@10280000 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x10280000 0x10>; - interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -208,7 +207,6 @@ pwm5: pwm@10280010 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x10280010 0x10>; - interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -220,7 +218,6 @@ pwm6: pwm@10280020 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x10280020 0x10>; - interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -232,7 +229,6 @@ pwm7: pwm@10280030 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x10280030 0x10>; - interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -386,7 +382,6 @@ pwm0: pwm@20040000 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x20040000 0x10>; - interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -398,7 +393,6 @@ pwm1: pwm@20040010 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x20040010 0x10>; - interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -410,7 +404,6 @@ pwm2: pwm@20040020 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x20040020 0x10>; - interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -422,7 +415,6 @@ pwm3: pwm@20040030 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x20040030 0x10>; - interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/st/stm32429i-eval.dts b/arch/arm/boot/dts/st/stm32429i-eval.dts index 576235ec3c51..afa417b34b25 100644 --- a/arch/arm/boot/dts/st/stm32429i-eval.dts +++ b/arch/arm/boot/dts/st/stm32429i-eval.dts @@ -222,7 +222,6 @@ reg = <0x42>; interrupts = <8 3>; interrupt-parent = <&gpioi>; - interrupt-controller; wakeup-source; stmpegpio: stmpe_gpio { diff --git a/arch/arm/boot/dts/st/stm32mp157c-dk2.dts b/arch/arm/boot/dts/st/stm32mp157c-dk2.dts index 510cca5acb79..7a701f7ef0c7 100644 --- a/arch/arm/boot/dts/st/stm32mp157c-dk2.dts +++ b/arch/arm/boot/dts/st/stm32mp157c-dk2.dts @@ -64,7 +64,6 @@ reg = <0x38>; interrupts = <2 2>; interrupt-parent = <&gpiof>; - interrupt-controller; touchscreen-size-x = <480>; touchscreen-size-y = <800>; status = "okay"; diff --git a/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts b/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts index c8e55642f9c6..3e834fc7e370 100644 --- a/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts +++ b/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts @@ -415,7 +415,6 @@ reg = <0x41>; interrupts = <30 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&gpio2>; - interrupt-controller; id = <0>; blocks = <0x5>; irq-trigger = <0x1>; diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 0a90583f9f01..8f9dbe8d9029 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -297,6 +297,7 @@ CONFIG_FB_MODE_HELPERS=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_L4F00242T03=y CONFIG_LCD_PLATFORM=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_BACKLIGHT_PWM=y CONFIG_BACKLIGHT_GPIO=y CONFIG_FRAMEBUFFER_CONSOLE=y diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 71b113976420..8b1ec60a9a46 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -339,6 +339,7 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), GPIO_LOOKUP_IDX("G", 0, NULL, 1, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + { } }, }; diff --git a/arch/arm64/boot/dts/allwinner/Makefile b/arch/arm64/boot/dts/allwinner/Makefile index 91d505b385de..1f1f8d865d0e 100644 --- a/arch/arm64/boot/dts/allwinner/Makefile +++ b/arch/arm64/boot/dts/allwinner/Makefile @@ -42,5 +42,6 @@ dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-bigtreetech-cb1-manta.dtb dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-bigtreetech-pi.dtb dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-orangepi-zero2.dtb dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-x96-mate.dtb +dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h618-orangepi-zero2w.dtb dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h618-orangepi-zero3.dtb dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h618-transpeed-8k618-t.dtb diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi index dccbba6e7f98..dbf2dce8d1d6 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi @@ -145,7 +145,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <160>; al,msi-num-spis = <160>; diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi index 39481d7fd7d4..3ea178acdddf 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi @@ -355,7 +355,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <336>; al,msi-num-spis = <959>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 9dcd25ec2c04..896d1f33b5b6 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -586,6 +586,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = <GIC_SPI 400 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index f049687d6b96..d8516ec0dae7 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -450,6 +450,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = <GIC_SPI 183 IRQ_TYPE_LEVEL_HIGH>; gpio-ranges = <&pinmux 0 0 16>, <&pinmux 16 71 2>, diff --git a/arch/arm64/boot/dts/freescale/Makefile b/arch/arm64/boot/dts/freescale/Makefile index 2e027675d7bb..2cb0212b63c6 100644 --- a/arch/arm64/boot/dts/freescale/Makefile +++ b/arch/arm64/boot/dts/freescale/Makefile @@ -20,23 +20,41 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-frwy.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-qds.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-rdb.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-tqmls1046a-mbls10xxa.dtb +DTC_FLAGS_fsl-ls1088a-qds := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-qds.dtb +DTC_FLAGS_fsl-ls1088a-rdb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-rdb.dtb +DTC_FLAGS_fsl-ls1088a-ten64 := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-ten64.dtb +DTC_FLAGS_fsl-ls1088a-tqmls1088a-mbls10xxa := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-tqmls1088a-mbls10xxa.dtb +DTC_FLAGS_fsl-ls2080a-qds := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-qds.dtb +DTC_FLAGS_fsl-ls2080a-rdb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-rdb.dtb +DTC_FLAGS_fsl-ls2081a-rdb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2081a-rdb.dtb +DTC_FLAGS_fsl-ls2080a-simu := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-simu.dtb +DTC_FLAGS_fsl-ls2088a-qds := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2088a-qds.dtb +DTC_FLAGS_fsl-ls2088a-rdb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2088a-rdb.dtb +DTC_FLAGS_fsl-lx2160a-bluebox3 := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-bluebox3.dtb +DTC_FLAGS_fsl-lx2160a-bluebox3-rev-a := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-bluebox3-rev-a.dtb +DTC_FLAGS_fsl-lx2160a-clearfog-cx := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-clearfog-cx.dtb +DTC_FLAGS_fsl-lx2160a-honeycomb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-honeycomb.dtb +DTC_FLAGS_fsl-lx2160a-qds := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-qds.dtb +DTC_FLAGS_fsl-lx2160a-rdb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-rdb.dtb +DTC_FLAGS_fsl-lx2162a-clearfog := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2162a-clearfog.dtb +DTC_FLAGS_fsl-lx2162a-qds := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2162a-qds.dtb fsl-ls1028a-qds-13bb-dtbs := fsl-ls1028a-qds.dtb fsl-ls1028a-qds-13bb.dtbo @@ -53,6 +71,7 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-85bb.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-899b.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-9999.dtb +DTC_FLAGS_fsl-lx2160a-tqmlx2160a-mblx2160a := -Wno-interrupt_map fsl-lx2160a-tqmlx2160a-mblx2160a-12-11-x-dtbs := fsl-lx2160a-tqmlx2160a-mblx2160a.dtb \ fsl-lx2160a-tqmlx2160a-mblx2160a_12_x_x.dtbo \ fsl-lx2160a-tqmlx2160a-mblx2160a_x_11_x.dtbo diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts b/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts index f38ee2266b25..a6b94d1957c9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts @@ -128,14 +128,9 @@ pinctrl-0 = <&pinctrl_ptn5150>; status = "okay"; - connector { - compatible = "usb-c-connector"; - label = "USB-C"; - - port { - typec1_dr_sw: endpoint { - remote-endpoint = <&usb1_drd_sw>; - }; + port { + typec1_dr_sw: endpoint { + remote-endpoint = <&usb1_drd_sw>; }; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts index d98a040860a4..5828c9d7821d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts @@ -486,7 +486,7 @@ &uart4 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart4>; - status = "okay"; + status = "disabled"; }; &usb3_phy0 { diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts index fea67a9282f0..b749e28e5ede 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts @@ -175,14 +175,10 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_ptn5150>; - connector { - compatible = "usb-c-connector"; - label = "USB-C"; - - port { - ptn5150_out_ep: endpoint { - remote-endpoint = <&dwc3_0_ep>; - }; + port { + + ptn5150_out_ep: endpoint { + remote-endpoint = <&dwc3_0_ep>; }; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi index 4ae4fdab461e..43f1d45ccc96 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi @@ -255,7 +255,7 @@ <&clk IMX8MP_AUDIO_PLL2_OUT>; assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL2_OUT>; assigned-clock-rates = <13000000>, <13000000>, <156000000>; - reset-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>; status = "disabled"; ports { diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts index a2d5d19b2de0..86d3da36e4f3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts @@ -184,6 +184,13 @@ enable-active-high; }; + reg_vcc_1v8: regulator-1v8 { + compatible = "regulator-fixed"; + regulator-name = "VCC_1V8"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + reg_vcc_3v3: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "VCC_3V3"; @@ -480,7 +487,7 @@ clock-names = "mclk"; clocks = <&audio_blk_ctrl IMX8MP_CLK_AUDIOMIX_SAI3_MCLK1>; reset-gpios = <&gpio4 29 GPIO_ACTIVE_LOW>; - iov-supply = <®_vcc_3v3>; + iov-supply = <®_vcc_1v8>; ldoin-supply = <®_vcc_3v3>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 76c73daf546b..39a550c1cd26 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1820,7 +1820,7 @@ compatible = "fsl,imx8mp-ldb"; reg = <0x5c 0x4>, <0x128 0x4>; reg-names = "ldb", "lvds"; - clocks = <&clk IMX8MP_CLK_MEDIA_LDB>; + clocks = <&clk IMX8MP_CLK_MEDIA_LDB_ROOT>; clock-names = "ldb"; assigned-clocks = <&clk IMX8MP_CLK_MEDIA_LDB>; assigned-clock-parents = <&clk IMX8MP_VIDEO_PLL1_OUT>; diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 48ec4ebec0a8..b864ffa74ea8 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -126,7 +126,6 @@ amba { #address-cells = <2>; #size-cells = <1>; - #interrupt-cells = <3>; compatible = "simple-bus"; interrupt-parent = <&gic>; diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi index 3869460aa5dc..996fb39bb50c 100644 --- a/arch/arm64/boot/dts/lg/lg1313.dtsi +++ b/arch/arm64/boot/dts/lg/lg1313.dtsi @@ -126,7 +126,6 @@ amba { #address-cells = <2>; #size-cells = <1>; - #interrupt-cells = <3>; compatible = "simple-bus"; interrupt-parent = <&gic>; diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi index 2c920e22cec2..7ec7c789d87e 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi @@ -138,7 +138,6 @@ odmi: odmi@300000 { compatible = "marvell,odmi-controller"; - interrupt-controller; msi-controller; marvell,odmi-frames = <4>; reg = <0x300000 0x4000>, diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index 69c7f3954ae5..4127cb84eba4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -128,6 +128,7 @@ compatible = "mediatek,mt6360"; reg = <0x34>; interrupt-controller; + #interrupt-cells = <1>; interrupts-extended = <&pio 101 IRQ_TYPE_EDGE_FALLING>; interrupt-names = "IRQB"; diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts index ea13c4a7027c..81a82933e350 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts @@ -175,7 +175,7 @@ status = "okay"; phy-handle = <&mgbe0_phy>; - phy-mode = "usxgmii"; + phy-mode = "10gbase-r"; mdio { #address-cells = <1>; diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi b/arch/arm64/boot/dts/nvidia/tegra234.dtsi index 3f16595d099c..d1bd328892af 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi @@ -1459,7 +1459,7 @@ <&mc TEGRA234_MEMORY_CLIENT_MGBEAWR &emc>; interconnect-names = "dma-mem", "write"; iommus = <&smmu_niso0 TEGRA234_SID_MGBE>; - power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEA>; + power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEB>; status = "disabled"; }; @@ -1493,7 +1493,7 @@ <&mc TEGRA234_MEMORY_CLIENT_MGBEBWR &emc>; interconnect-names = "dma-mem", "write"; iommus = <&smmu_niso0 TEGRA234_SID_MGBE_VF1>; - power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEB>; + power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEC>; status = "disabled"; }; @@ -1527,7 +1527,7 @@ <&mc TEGRA234_MEMORY_CLIENT_MGBECWR &emc>; interconnect-names = "dma-mem", "write"; iommus = <&smmu_niso0 TEGRA234_SID_MGBE_VF2>; - power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEC>; + power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBED>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index 5e1277fea725..61c8fd49c966 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -830,10 +830,10 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 75 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ - <0 0 0 2 &intc 0 78 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ - <0 0 0 3 &intc 0 79 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ - <0 0 0 4 &intc 0 83 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ + interrupt-map = <0 0 0 1 &intc 0 0 0 75 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ + <0 0 0 2 &intc 0 0 0 78 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ + <0 0 0 3 &intc 0 0 0 79 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ + <0 0 0 4 &intc 0 0 0 83 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ clocks = <&gcc GCC_SYS_NOC_PCIE0_AXI_CLK>, <&gcc GCC_PCIE0_AXI_M_CLK>, diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index cf295bed3299..26441447c866 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -814,13 +814,13 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 142 + interrupt-map = <0 0 0 1 &intc 0 0 142 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ - <0 0 0 2 &intc 0 143 + <0 0 0 2 &intc 0 0 143 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ - <0 0 0 3 &intc 0 144 + <0 0 0 3 &intc 0 0 144 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ - <0 0 0 4 &intc 0 145 + <0 0 0 4 &intc 0 0 145 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ clocks = <&gcc GCC_SYS_NOC_PCIE1_AXI_CLK>, @@ -876,13 +876,13 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 75 + interrupt-map = <0 0 0 1 &intc 0 0 75 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ - <0 0 0 2 &intc 0 78 + <0 0 0 2 &intc 0 0 78 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ - <0 0 0 3 &intc 0 79 + <0 0 0 3 &intc 0 0 79 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ - <0 0 0 4 &intc 0 83 + <0 0 0 4 &intc 0 0 83 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ clocks = <&gcc GCC_SYS_NOC_PCIE0_AXI_CLK>, diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 8d41ed261adf..ee6f87c828ae 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -457,25 +457,6 @@ }; }; - mpm: interrupt-controller { - compatible = "qcom,mpm"; - qcom,rpm-msg-ram = <&apss_mpm>; - interrupts = <GIC_SPI 171 IRQ_TYPE_EDGE_RISING>; - mboxes = <&apcs_glb 1>; - interrupt-controller; - #interrupt-cells = <2>; - #power-domain-cells = <0>; - interrupt-parent = <&intc>; - qcom,mpm-pin-count = <96>; - qcom,mpm-pin-map = <2 184>, /* TSENS1 upper_lower_int */ - <52 243>, /* DWC3_PRI ss_phy_irq */ - <79 347>, /* DWC3_PRI hs_phy_irq */ - <80 352>, /* DWC3_SEC hs_phy_irq */ - <81 347>, /* QUSB2_PHY_PRI DP+DM */ - <82 352>, /* QUSB2_PHY_SEC DP+DM */ - <87 326>; /* SPMI */ - }; - psci { compatible = "arm,psci-1.0"; method = "smc"; @@ -765,15 +746,8 @@ }; rpm_msg_ram: sram@68000 { - compatible = "qcom,rpm-msg-ram", "mmio-sram"; + compatible = "qcom,rpm-msg-ram"; reg = <0x00068000 0x6000>; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 0x00068000 0x7000>; - - apss_mpm: sram@1b8 { - reg = <0x1b8 0x48>; - }; }; qfprom@74000 { @@ -856,8 +830,8 @@ reg = <0x004ad000 0x1000>, /* TM */ <0x004ac000 0x1000>; /* SROT */ #qcom,sensors = <8>; - interrupts-extended = <&mpm 2 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "uplow", "critical"; #thermal-sensor-cells = <1>; }; @@ -1363,7 +1337,6 @@ interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>; gpio-controller; gpio-ranges = <&tlmm 0 0 150>; - wakeup-parent = <&mpm>; #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; @@ -1891,7 +1864,7 @@ <0x0400a000 0x002100>; reg-names = "core", "chnls", "obsrvr", "intr", "cnfg"; interrupt-names = "periph_irq"; - interrupts-extended = <&mpm 87 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 326 IRQ_TYPE_LEVEL_HIGH>; qcom,ee = <0>; qcom,channel = <0>; #address-cells = <2>; @@ -3052,8 +3025,8 @@ #size-cells = <1>; ranges; - interrupts-extended = <&mpm 79 IRQ_TYPE_LEVEL_HIGH>, - <&mpm 52 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 243 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "hs_phy_irq", "ss_phy_irq"; clocks = <&gcc GCC_SYS_NOC_USB3_AXI_CLK>, diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts index ffc4406422ae..41215567b3ae 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts @@ -563,6 +563,8 @@ }; &pcie4 { + max-link-speed = <2>; + perst-gpios = <&tlmm 141 GPIO_ACTIVE_LOW>; wake-gpios = <&tlmm 139 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index def3976bd5bb..eb657e544961 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -722,6 +722,8 @@ }; &pcie4 { + max-link-speed = <2>; + perst-gpios = <&tlmm 141 GPIO_ACTIVE_LOW>; wake-gpios = <&tlmm 139 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi index 160e098f1075..f9849b8befbf 100644 --- a/arch/arm64/boot/dts/qcom/sm6115.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi @@ -1304,6 +1304,9 @@ &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>, <&system_noc MASTER_QUP_0 RPM_ALWAYS_TAG &bimc SLAVE_EBI_CH0 RPM_ALWAYS_TAG>; + interconnect-names = "qup-core", + "qup-config", + "qup-memory"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm64/boot/dts/qcom/sm8650-mtp.dts b/arch/arm64/boot/dts/qcom/sm8650-mtp.dts index 9d916edb1c73..be133a3d5cbe 100644 --- a/arch/arm64/boot/dts/qcom/sm8650-mtp.dts +++ b/arch/arm64/boot/dts/qcom/sm8650-mtp.dts @@ -622,7 +622,7 @@ &tlmm { /* Reserved I/Os for NFC */ - gpio-reserved-ranges = <32 8>; + gpio-reserved-ranges = <32 8>, <74 1>; disp0_reset_n_active: disp0-reset-n-active-state { pins = "gpio133"; diff --git a/arch/arm64/boot/dts/qcom/sm8650-qrd.dts b/arch/arm64/boot/dts/qcom/sm8650-qrd.dts index 592a67a47c78..b9151c2ddf2e 100644 --- a/arch/arm64/boot/dts/qcom/sm8650-qrd.dts +++ b/arch/arm64/boot/dts/qcom/sm8650-qrd.dts @@ -659,7 +659,7 @@ &tlmm { /* Reserved I/Os for NFC */ - gpio-reserved-ranges = <32 8>; + gpio-reserved-ranges = <32 8>, <74 1>; bt_default: bt-default-state { bt-en-pins { diff --git a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi index 3885ef3454ff..50de17e4fb3f 100644 --- a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi @@ -234,6 +234,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio6>; interrupts = <8 IRQ_TYPE_EDGE_FALLING>; @@ -294,6 +295,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio6>; interrupts = <4 IRQ_TYPE_EDGE_FALLING>; }; @@ -314,6 +316,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio7>; interrupts = <3 IRQ_TYPE_EDGE_FALLING>; }; @@ -324,6 +327,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio5>; interrupts = <9 IRQ_TYPE_EDGE_FALLING>; }; diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index d0905515399b..9137dd76e72c 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -631,6 +631,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 12>, <&dmac 13>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi0_clk &spi0_csn &spi0_miso &spi0_mosi>; #address-cells = <1>; @@ -646,6 +647,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 14>, <&dmac 15>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index fb5dcf6e9327..7b4c15c4a9c3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -488,7 +488,6 @@ pwm3: pwm@ff1b0030 { compatible = "rockchip,rk3328-pwm"; reg = <0x0 0xff1b0030 0x0 0x10>; - interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts index d4c70835e0fe..a4946cdc3bb3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts @@ -72,7 +72,7 @@ vin-supply = <&vcc3v3_sys>; }; - vcc5v0_usb30_host: vcc5v0-usb30-host-regulator { + vcc5v0_usb_host1: vcc5v0_usb_host2: vcc5v0-usb-host-regulator { compatible = "regulator-fixed"; regulator-name = "vcc5v0_host"; regulator-boot-on; @@ -114,6 +114,7 @@ status = "okay"; }; +/* Standard pcie */ &pcie3x2 { reset-gpios = <&gpio3 RK_PB0 GPIO_ACTIVE_HIGH>; vpcie3v3-supply = <&vcc3v3_sys>; @@ -122,6 +123,7 @@ /* M.2 M-Key ssd */ &pcie3x4 { + num-lanes = <2>; reset-gpios = <&gpio4 RK_PB6 GPIO_ACTIVE_HIGH>; vpcie3v3-supply = <&vcc3v3_sys>; status = "okay"; @@ -188,12 +190,12 @@ }; &u2phy2_host { - phy-supply = <&vcc5v0_usb30_host>; + phy-supply = <&vcc5v0_usb_host1>; status = "okay"; }; &u2phy3_host { - phy-supply = <&vcc5v0_usb30_host>; + phy-supply = <&vcc5v0_usb_host2>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi index 0b02f4d6e003..cce1c8e83587 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi @@ -16,8 +16,8 @@ aliases { mmc0 = &sdhci; - mmc1 = &sdio; - mmc2 = &sdmmc; + mmc1 = &sdmmc; + mmc2 = &sdio; serial2 = &uart2; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts index ac7c677b0fb9..de30c2632b8e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts @@ -448,6 +448,7 @@ <&rk806_dvs2_null>, <&rk806_dvs3_null>; pinctrl-names = "default"; spi-max-frequency = <1000000>; + system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts b/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts index 4ce70fb75a30..39d65002add1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts @@ -62,7 +62,6 @@ compatible = "gpio-leds"; pinctrl-names = "default"; pinctrl-0 = <&led1_pin>; - status = "okay"; /* LED1 on PCB */ led-1 { diff --git a/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts b/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts index d7722772ecd8..997b516c2533 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts @@ -189,19 +189,19 @@ cpu-supply = <&vdd_cpu_lit_s0>; }; -&cpu_b0{ +&cpu_b0 { cpu-supply = <&vdd_cpu_big0_s0>; }; -&cpu_b1{ +&cpu_b1 { cpu-supply = <&vdd_cpu_big0_s0>; }; -&cpu_b2{ +&cpu_b2 { cpu-supply = <&vdd_cpu_big1_s0>; }; -&cpu_b3{ +&cpu_b3 { cpu-supply = <&vdd_cpu_big1_s0>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts index ef4f058c20ff..e037bf9db75a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts @@ -19,8 +19,8 @@ aliases { mmc0 = &sdhci; - mmc1 = &sdio; - mmc2 = &sdmmc; + mmc1 = &sdmmc; + mmc2 = &sdio; }; analog-sound { diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts index dc677f29a9c7..3c2278886851 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts @@ -195,13 +195,13 @@ &gpio1 { gpio-line-names = /* GPIO1 A0-A7 */ - "HEADER_27_3v3", "HEADER_28_3v3", "", "", + "HEADER_27_3v3", "", "", "", "HEADER_29_1v8", "", "HEADER_7_1v8", "", /* GPIO1 B0-B7 */ "", "HEADER_31_1v8", "HEADER_33_1v8", "", "HEADER_11_1v8", "HEADER_13_1v8", "", "", /* GPIO1 C0-C7 */ - "", "", "", "", + "", "HEADER_28_3v3", "", "", "", "", "", "", /* GPIO1 D0-D7 */ "", "", "", "", @@ -225,11 +225,11 @@ &gpio4 { gpio-line-names = /* GPIO4 A0-A7 */ - "", "", "HEADER_37_3v3", "HEADER_32_3v3", - "HEADER_36_3v3", "", "HEADER_35_3v3", "HEADER_38_3v3", + "", "", "HEADER_37_3v3", "HEADER_8_3v3", + "HEADER_10_3v3", "", "HEADER_32_3v3", "HEADER_35_3v3", /* GPIO4 B0-B7 */ "", "", "", "HEADER_40_3v3", - "HEADER_8_3v3", "HEADER_10_3v3", "", "", + "HEADER_38_3v3", "HEADER_36_3v3", "", "", /* GPIO4 C0-C7 */ "", "", "", "", "", "", "", "", diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index bac4cabef607..467ac2f768ac 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req) src += blocks * AES_BLOCK_SIZE; } if (nbytes && walk.nbytes == walk.total) { + u8 buf[AES_BLOCK_SIZE]; + u8 *d = dst; + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); + neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, nbytes, walk.iv); + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(d, dst, nbytes); + nbytes = 0; } kernel_neon_end(); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 481d94416d69..b67b89c54e1c 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -386,6 +386,7 @@ extern void sme_alloc(struct task_struct *task, bool flush); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +extern void sme_suspend_exit(void); /* * Return how many bytes of memory are required to store the full SME @@ -421,6 +422,7 @@ static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline void sme_suspend_exit(void) { } static inline size_t sme_state_size(struct task_struct const *task) { diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index b360c4c2b5e7..6aafbb789991 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -15,10 +15,6 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE -/* - * Prefer the constraint "S" to support PIC with GCC. Clang before 19 does not - * support "S" on a symbol with a constant offset, so we use "i" as a fallback. - */ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { @@ -27,9 +23,9 @@ static __always_inline bool arch_static_branch(struct static_key * const key, " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" " .long 1b - ., %l[l_yes] - . \n\t" - " .quad (%[key] - .) + %[bit0] \n\t" + " .quad %c0 - . \n\t" " .popsection \n\t" - : : [key]"Si"(key), [bit0]"i"(branch) : : l_yes); + : : "i"(&((char *)key)[branch]) : : l_yes); return false; l_yes: @@ -44,9 +40,9 @@ static __always_inline bool arch_static_branch_jump(struct static_key * const ke " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" " .long 1b - ., %l[l_yes] - . \n\t" - " .quad (%[key] - .) + %[bit0] \n\t" + " .quad %c0 - . \n\t" " .popsection \n\t" - : : [key]"Si"(key), [bit0]"i"(branch) : : l_yes); + : : "i"(&((char *)key)[branch]) : : l_yes); return false; l_yes: diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h index 68908b82b168..587bdb91ab7a 100644 --- a/arch/arm64/include/asm/patching.h +++ b/arch/arm64/include/asm/patching.h @@ -8,6 +8,8 @@ int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); int aarch64_insn_write_literal_u64(void *addr, u64 val); +void *aarch64_insn_set(void *dst, u32 insn, size_t len); +void *aarch64_insn_copy(void *dst, void *src, size_t len); int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 25ceaee6b025..f27acca550d5 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1311,6 +1311,22 @@ void __init sme_setup(void) get_sme_default_vl()); } +void sme_suspend_exit(void) +{ + u64 smcr = 0; + + if (!system_supports_sme()) + return; + + if (system_supports_fa64()) + smcr |= SMCR_ELx_FA64; + if (system_supports_sme2()) + smcr |= SMCR_ELx_EZT0; + + write_sysreg_s(smcr, SYS_SMCR_EL1); + write_sysreg_s(0, SYS_SMPRI_EL1); +} + #endif /* CONFIG_ARM64_SME */ static void sve_init_regs(void) diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index b4835f6d594b..255534930368 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -105,6 +105,81 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val) return ret; } +typedef void text_poke_f(void *dst, void *src, size_t patched, size_t len); + +static void *__text_poke(text_poke_f func, void *addr, void *src, size_t len) +{ + unsigned long flags; + size_t patched = 0; + size_t size; + void *waddr; + void *ptr; + + raw_spin_lock_irqsave(&patch_lock, flags); + + while (patched < len) { + ptr = addr + patched; + size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr), + len - patched); + + waddr = patch_map(ptr, FIX_TEXT_POKE0); + func(waddr, src, patched, size); + patch_unmap(FIX_TEXT_POKE0); + + patched += size; + } + raw_spin_unlock_irqrestore(&patch_lock, flags); + + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); + + return addr; +} + +static void text_poke_memcpy(void *dst, void *src, size_t patched, size_t len) +{ + copy_to_kernel_nofault(dst, src + patched, len); +} + +static void text_poke_memset(void *dst, void *src, size_t patched, size_t len) +{ + u32 c = *(u32 *)src; + + memset32(dst, c, len / 4); +} + +/** + * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory + * @dst: address to modify + * @src: source of the copy + * @len: length to copy + * + * Useful for JITs to dump new code blocks into unused regions of RX memory. + */ +noinstr void *aarch64_insn_copy(void *dst, void *src, size_t len) +{ + /* A64 instructions must be word aligned */ + if ((uintptr_t)dst & 0x3) + return NULL; + + return __text_poke(text_poke_memcpy, dst, src, len); +} + +/** + * aarch64_insn_set - memset for RX memory regions. + * @dst: address to modify + * @insn: value to set + * @len: length of memory region. + * + * Useful for JITs to fill regions of RX memory with illegal instructions. + */ +noinstr void *aarch64_insn_set(void *dst, u32 insn, size_t len) +{ + if ((uintptr_t)dst & 0x3) + return NULL; + + return __text_poke(text_poke_memset, dst, &insn, len); +} + int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) { u32 *tp = addr; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 7f88028a00c0..66cffc5fc0be 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -7,6 +7,7 @@ #include <linux/kernel.h> #include <linux/efi.h> #include <linux/export.h> +#include <linux/filter.h> #include <linux/ftrace.h> #include <linux/kprobes.h> #include <linux/sched.h> @@ -266,6 +267,31 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs); } +struct bpf_unwind_consume_entry_data { + bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp); + void *cookie; +}; + +static bool +arch_bpf_unwind_consume_entry(const struct kunwind_state *state, void *cookie) +{ + struct bpf_unwind_consume_entry_data *data = cookie; + + return data->consume_entry(data->cookie, state->common.pc, 0, + state->common.fp); +} + +noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u64 ip, u64 sp, + u64 fp), void *cookie) +{ + struct bpf_unwind_consume_entry_data data = { + .consume_entry = consume_entry, + .cookie = cookie, + }; + + kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL); +} + static bool dump_backtrace_entry(void *arg, unsigned long where) { char *loglvl = arg; diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index eca4d0435211..eaaff94329cd 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -12,6 +12,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/exec.h> +#include <asm/fpsimd.h> #include <asm/mte.h> #include <asm/memory.h> #include <asm/mmu_context.h> @@ -80,6 +81,8 @@ void notrace __cpu_suspend_exit(void) */ spectre_v4_enable_mitigation(NULL); + sme_suspend_exit(); + /* Restore additional feature-specific configuration */ ptrauth_suspend_exit(); } diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index cfd5434de483..5afc7a525eca 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -76,6 +76,7 @@ struct jit_ctx { int *offset; int exentry_idx; __le32 *image; + __le32 *ro_image; u32 stack_size; int fpb_offset; }; @@ -205,6 +206,14 @@ static void jit_fill_hole(void *area, unsigned int size) *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); } +int bpf_arch_text_invalidate(void *dst, size_t len) +{ + if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len)) + return -EINVAL; + + return 0; +} + static inline int epilogue_offset(const struct jit_ctx *ctx) { int to = ctx->epilogue_offset; @@ -285,7 +294,8 @@ static bool is_lsi_offset(int offset, int scale) /* Tail call offset to jump into */ #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) -static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) +static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, + bool is_exception_cb) { const struct bpf_prog *prog = ctx->prog; const bool is_main_prog = !bpf_is_subprog(prog); @@ -333,19 +343,34 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit(A64_MOV(1, A64_R(9), A64_LR), ctx); emit(A64_NOP, ctx); - /* Sign lr */ - if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) - emit(A64_PACIASP, ctx); - - /* Save FP and LR registers to stay align with ARM64 AAPCS */ - emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); - emit(A64_MOV(1, A64_FP, A64_SP), ctx); - - /* Save callee-saved registers */ - emit(A64_PUSH(r6, r7, A64_SP), ctx); - emit(A64_PUSH(r8, r9, A64_SP), ctx); - emit(A64_PUSH(fp, tcc, A64_SP), ctx); - emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); + if (!is_exception_cb) { + /* Sign lr */ + if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) + emit(A64_PACIASP, ctx); + /* Save FP and LR registers to stay align with ARM64 AAPCS */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* Save callee-saved registers */ + emit(A64_PUSH(r6, r7, A64_SP), ctx); + emit(A64_PUSH(r8, r9, A64_SP), ctx); + emit(A64_PUSH(fp, tcc, A64_SP), ctx); + emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); + } else { + /* + * Exception callback receives FP of Main Program as third + * parameter + */ + emit(A64_MOV(1, A64_FP, A64_R(2)), ctx); + /* + * Main Program already pushed the frame record and the + * callee-saved registers. The exception callback will not push + * anything and re-use the main program's stack. + * + * 10 registers are on the stack + */ + emit(A64_SUB_I(1, A64_SP, A64_FP, 80), ctx); + } /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); @@ -365,6 +390,20 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit_bti(A64_BTI_J, ctx); } + /* + * Program acting as exception boundary should save all ARM64 + * Callee-saved registers as the exception callback needs to recover + * all ARM64 Callee-saved registers in its epilogue. + */ + if (prog->aux->exception_boundary) { + /* + * As we are pushing two more registers, BPF_FP should be moved + * 16 bytes + */ + emit(A64_SUB_I(1, fp, fp, 16), ctx); + emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx); + } + emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); /* Stack must be multiples of 16B */ @@ -653,7 +692,7 @@ static void build_plt(struct jit_ctx *ctx) plt->target = (u64)&dummy_tramp; } -static void build_epilogue(struct jit_ctx *ctx) +static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb) { const u8 r0 = bpf2a64[BPF_REG_0]; const u8 r6 = bpf2a64[BPF_REG_6]; @@ -666,6 +705,15 @@ static void build_epilogue(struct jit_ctx *ctx) /* We're done with BPF stack */ emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + /* + * Program acting as exception boundary pushes R23 and R24 in addition + * to BPF callee-saved registers. Exception callback uses the boundary + * program's stack frame, so recover these extra registers in the above + * two cases. + */ + if (ctx->prog->aux->exception_boundary || is_exception_cb) + emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx); + /* Restore x27 and x28 */ emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); /* Restore fs (x25) and x26 */ @@ -707,7 +755,8 @@ static int add_exception_handler(const struct bpf_insn *insn, struct jit_ctx *ctx, int dst_reg) { - off_t offset; + off_t ins_offset; + off_t fixup_offset; unsigned long pc; struct exception_table_entry *ex; @@ -724,12 +773,17 @@ static int add_exception_handler(const struct bpf_insn *insn, return -EINVAL; ex = &ctx->prog->aux->extable[ctx->exentry_idx]; - pc = (unsigned long)&ctx->image[ctx->idx - 1]; + pc = (unsigned long)&ctx->ro_image[ctx->idx - 1]; - offset = pc - (long)&ex->insn; - if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + /* + * This is the relative offset of the instruction that may fault from + * the exception table itself. This will be written to the exception + * table and if this instruction faults, the destination register will + * be set to '0' and the execution will jump to the next instruction. + */ + ins_offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) return -ERANGE; - ex->insn = offset; /* * Since the extable follows the program, the fixup offset is always @@ -738,12 +792,25 @@ static int add_exception_handler(const struct bpf_insn *insn, * bits. We don't need to worry about buildtime or runtime sort * modifying the upper bits because the table is already sorted, and * isn't part of the main exception table. + * + * The fixup_offset is set to the next instruction from the instruction + * that may fault. The execution will jump to this after handling the + * fault. */ - offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); - if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) + fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) return -ERANGE; - ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | + /* + * The offsets above have been calculated using the RO buffer but we + * need to use the R/W buffer for writes. + * switch ex to rw buffer for writing. + */ + ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image); + + ex->insn = ins_offset; + + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); ex->type = EX_TYPE_BPF; @@ -1511,7 +1578,8 @@ static inline void bpf_flush_icache(void *start, void *end) struct arm64_jit_data { struct bpf_binary_header *header; - u8 *image; + u8 *ro_image; + struct bpf_binary_header *ro_header; struct jit_ctx ctx; }; @@ -1520,12 +1588,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) int image_size, prog_size, extable_size, extable_align, extable_offset; struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; + struct bpf_binary_header *ro_header; struct arm64_jit_data *jit_data; bool was_classic = bpf_prog_was_classic(prog); bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; u8 *image_ptr; + u8 *ro_image_ptr; if (!prog->jit_requested) return orig_prog; @@ -1552,8 +1622,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } if (jit_data->ctx.offset) { ctx = jit_data->ctx; - image_ptr = jit_data->image; + ro_image_ptr = jit_data->ro_image; + ro_header = jit_data->ro_header; header = jit_data->header; + image_ptr = (void *)header + ((void *)ro_image_ptr + - (void *)ro_header); extra_pass = true; prog_size = sizeof(u32) * ctx.idx; goto skip_init_ctx; @@ -1575,7 +1648,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * BPF line info needs ctx->offset[i] to be the offset of * instruction[i] in jited image, so build prologue first. */ - if (build_prologue(&ctx, was_classic)) { + if (build_prologue(&ctx, was_classic, prog->aux->exception_cb)) { prog = orig_prog; goto out_off; } @@ -1586,7 +1659,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.epilogue_offset = ctx.idx; - build_epilogue(&ctx); + build_epilogue(&ctx, prog->aux->exception_cb); build_plt(&ctx); extable_align = __alignof__(struct exception_table_entry); @@ -1598,63 +1671,81 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* also allocate space for plt target */ extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); image_size = extable_offset + extable_size; - header = bpf_jit_binary_alloc(image_size, &image_ptr, - sizeof(u32), jit_fill_hole); - if (header == NULL) { + ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, + sizeof(u32), &header, &image_ptr, + jit_fill_hole); + if (!ro_header) { prog = orig_prog; goto out_off; } /* 2. Now, the actual pass. */ + /* + * Use the image(RW) for writing the JITed instructions. But also save + * the ro_image(RX) for calculating the offsets in the image. The RW + * image will be later copied to the RX image from where the program + * will run. The bpf_jit_binary_pack_finalize() will do this copy in the + * final step. + */ ctx.image = (__le32 *)image_ptr; + ctx.ro_image = (__le32 *)ro_image_ptr; if (extable_size) - prog->aux->extable = (void *)image_ptr + extable_offset; + prog->aux->extable = (void *)ro_image_ptr + extable_offset; skip_init_ctx: ctx.idx = 0; ctx.exentry_idx = 0; - build_prologue(&ctx, was_classic); + build_prologue(&ctx, was_classic, prog->aux->exception_cb); if (build_body(&ctx, extra_pass)) { - bpf_jit_binary_free(header); prog = orig_prog; - goto out_off; + goto out_free_hdr; } - build_epilogue(&ctx); + build_epilogue(&ctx, prog->aux->exception_cb); build_plt(&ctx); /* 3. Extra pass to validate JITed code. */ if (validate_ctx(&ctx)) { - bpf_jit_binary_free(header); prog = orig_prog; - goto out_off; + goto out_free_hdr; } /* And we're done. */ if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, prog_size, 2, ctx.image); - bpf_flush_icache(header, ctx.image + ctx.idx); - if (!prog->is_func || extra_pass) { if (extra_pass && ctx.idx != jit_data->ctx.idx) { pr_err_once("multi-func JIT bug %d != %d\n", ctx.idx, jit_data->ctx.idx); - bpf_jit_binary_free(header); prog->bpf_func = NULL; prog->jited = 0; prog->jited_len = 0; + goto out_free_hdr; + } + if (WARN_ON(bpf_jit_binary_pack_finalize(prog, ro_header, + header))) { + /* ro_header has been freed */ + ro_header = NULL; + prog = orig_prog; goto out_off; } - bpf_jit_binary_lock_ro(header); + /* + * The instructions have now been copied to the ROX region from + * where they will execute. Now the data cache has to be cleaned to + * the PoU and the I-cache has to be invalidated for the VAs. + */ + bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); } else { jit_data->ctx = ctx; - jit_data->image = image_ptr; + jit_data->ro_image = ro_image_ptr; jit_data->header = header; + jit_data->ro_header = ro_header; } - prog->bpf_func = (void *)ctx.image; + + prog->bpf_func = (void *)ctx.ro_image; prog->jited = 1; prog->jited_len = prog_size; @@ -1675,6 +1766,14 @@ out: bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); return prog; + +out_free_hdr: + if (header) { + bpf_arch_text_copy(&ro_header->size, &header->size, + sizeof(header->size)); + bpf_jit_binary_pack_free(ro_header, header); + } + goto out_off; } bool bpf_jit_supports_kfunc_call(void) @@ -1682,6 +1781,13 @@ bool bpf_jit_supports_kfunc_call(void) return true; } +void *bpf_arch_text_copy(void *dst, void *src, size_t len) +{ + if (!aarch64_insn_copy(dst, src, len)) + return ERR_PTR(-EINVAL); + return dst; +} + u64 bpf_jit_alloc_exec_limit(void) { return VMALLOC_END - VMALLOC_START; @@ -2310,3 +2416,37 @@ bool bpf_jit_supports_ptr_xchg(void) { return true; } + +bool bpf_jit_supports_exceptions(void) +{ + /* We unwind through both kernel frames starting from within bpf_throw + * call and BPF frames. Therefore we require FP unwinder to be enabled + * to walk kernel frames and reach BPF frames in the stack trace. + * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y + */ + return true; +} + +void bpf_jit_free(struct bpf_prog *prog) +{ + if (prog->jited) { + struct arm64_jit_data *jit_data = prog->aux->jit_data; + struct bpf_binary_header *hdr; + + /* + * If we fail the final pass of JIT (from jit_subprogs), + * the program may not be finalized yet. Call finalize here + * before freeing it. + */ + if (jit_data) { + bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size, + sizeof(jit_data->header->size)); + kfree(jit_data); + } + hdr = bpf_jit_binary_pack_hdr(prog); + bpf_jit_binary_pack_free(hdr, NULL); + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); + } + + bpf_prog_unlock_free(prog); +} diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts index b38071a4d0b0..8aefb0c12672 100644 --- a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts @@ -60,7 +60,7 @@ #address-cells = <1>; #size-cells = <0>; - eeprom@57{ + eeprom@57 { compatible = "atmel,24c16"; reg = <0x57>; pagesize = <16>; diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts index 132a2d1ea8bc..ed4d32434041 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts @@ -78,7 +78,7 @@ #address-cells = <1>; #size-cells = <0>; - eeprom@57{ + eeprom@57 { compatible = "atmel,24c16"; reg = <0x57>; pagesize = <16>; diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index edf2bba80130..634ef17fd38b 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -357,6 +357,8 @@ void __init platform_init(void) acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); #endif + + early_init_fdt_scan_reserved_mem(); unflatten_and_copy_device_tree(); #ifdef CONFIG_NUMA @@ -390,8 +392,6 @@ static void __init arch_mem_init(char **cmdline_p) check_kernel_sections_mem(); - early_init_fdt_scan_reserved_mem(); - /* * In order to reduce the possibility of kernel panic when failed to * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 2b49d30eb7c0..aabee0b280fe 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -88,6 +88,73 @@ void show_ipi_list(struct seq_file *p, int prec) } } +static inline void set_cpu_core_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_core_setup_map); + + for_each_cpu(i, &cpu_core_setup_map) { + if (cpu_data[cpu].package == cpu_data[i].package) { + cpumask_set_cpu(i, &cpu_core_map[cpu]); + cpumask_set_cpu(cpu, &cpu_core_map[i]); + } + } +} + +static inline void set_cpu_sibling_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_sibling_setup_map); + + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_set_cpu(i, &cpu_sibling_map[cpu]); + cpumask_set_cpu(cpu, &cpu_sibling_map[i]); + } + } +} + +static inline void clear_cpu_sibling_map(int cpu) +{ + int i; + + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_clear_cpu(i, &cpu_sibling_map[cpu]); + cpumask_clear_cpu(cpu, &cpu_sibling_map[i]); + } + } + + cpumask_clear_cpu(cpu, &cpu_sibling_setup_map); +} + +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + cpumask_clear(&temp_foreign_map); + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpus_are_siblings(i, k)) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + for_each_online_cpu(i) + cpumask_andnot(&cpu_foreign_map[i], + &temp_foreign_map, &cpu_sibling_map[i]); +} + /* Send mailbox buffer via Mail_Send */ static void csr_mail_send(uint64_t data, int cpu, int mailbox) { @@ -303,6 +370,7 @@ int loongson_cpu_disable(void) numa_remove_cpu(cpu); #endif set_cpu_online(cpu, false); + clear_cpu_sibling_map(cpu); calculate_cpu_foreign_map(); local_irq_save(flags); irq_migrate_all_off_this_cpu(); @@ -337,6 +405,7 @@ void __noreturn arch_cpu_idle_dead(void) addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); } while (addr == 0); + local_irq_disable(); init_fn = (void *)TO_CACHE(addr); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); @@ -379,59 +448,6 @@ static int __init ipi_pm_init(void) core_initcall(ipi_pm_init); #endif -static inline void set_cpu_sibling_map(int cpu) -{ - int i; - - cpumask_set_cpu(cpu, &cpu_sibling_setup_map); - - for_each_cpu(i, &cpu_sibling_setup_map) { - if (cpus_are_siblings(cpu, i)) { - cpumask_set_cpu(i, &cpu_sibling_map[cpu]); - cpumask_set_cpu(cpu, &cpu_sibling_map[i]); - } - } -} - -static inline void set_cpu_core_map(int cpu) -{ - int i; - - cpumask_set_cpu(cpu, &cpu_core_setup_map); - - for_each_cpu(i, &cpu_core_setup_map) { - if (cpu_data[cpu].package == cpu_data[i].package) { - cpumask_set_cpu(i, &cpu_core_map[cpu]); - cpumask_set_cpu(cpu, &cpu_core_map[i]); - } - } -} - -/* - * Calculate a new cpu_foreign_map mask whenever a - * new cpu appears or disappears. - */ -void calculate_cpu_foreign_map(void) -{ - int i, k, core_present; - cpumask_t temp_foreign_map; - - /* Re-calculate the mask */ - cpumask_clear(&temp_foreign_map); - for_each_online_cpu(i) { - core_present = 0; - for_each_cpu(k, &temp_foreign_map) - if (cpus_are_siblings(i, k)) - core_present = 1; - if (!core_present) - cpumask_set_cpu(i, &temp_foreign_map); - } - - for_each_online_cpu(i) - cpumask_andnot(&cpu_foreign_map[i], - &temp_foreign_map, &cpu_sibling_map[i]); -} - /* Preload SMP state for boot cpu */ void smp_prepare_boot_cpu(void) { diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 27701991886d..36106922b5d7 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -298,74 +298,73 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) return ret; } -static int _kvm_get_cpucfg(int id, u64 *v) +static int _kvm_get_cpucfg_mask(int id, u64 *v) { - int ret = 0; - - if (id < 0 && id >= KVM_MAX_CPUCFG_REGS) + if (id < 0 || id >= KVM_MAX_CPUCFG_REGS) return -EINVAL; switch (id) { case 2: - /* Return CPUCFG2 features which have been supported by KVM */ + /* CPUCFG2 features unconditionally supported by KVM */ *v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP | CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV | CPUCFG2_LAM; /* - * If LSX is supported by CPU, it is also supported by KVM, - * as we implement it. + * For the ISA extensions listed below, if one is supported + * by the host, then it is also supported by KVM. */ if (cpu_has_lsx) *v |= CPUCFG2_LSX; - /* - * if LASX is supported by CPU, it is also supported by KVM, - * as we implement it. - */ if (cpu_has_lasx) *v |= CPUCFG2_LASX; - break; + return 0; default: - ret = -EINVAL; - break; + /* + * No restrictions on other valid CPUCFG IDs' values, but + * CPUCFG data is limited to 32 bits as the LoongArch ISA + * manual says (Volume 1, Section 2.2.10.5 "CPUCFG"). + */ + *v = U32_MAX; + return 0; } - return ret; } static int kvm_check_cpucfg(int id, u64 val) { - u64 mask; - int ret = 0; - - if (id < 0 && id >= KVM_MAX_CPUCFG_REGS) - return -EINVAL; + int ret; + u64 mask = 0; - if (_kvm_get_cpucfg(id, &mask)) + ret = _kvm_get_cpucfg_mask(id, &mask); + if (ret) return ret; + if (val & ~mask) + /* Unsupported features and/or the higher 32 bits should not be set */ + return -EINVAL; + switch (id) { case 2: - /* CPUCFG2 features checking */ - if (val & ~mask) - /* The unsupported features should not be set */ - ret = -EINVAL; - else if (!(val & CPUCFG2_LLFTP)) - /* The LLFTP must be set, as guest must has a constant timer */ - ret = -EINVAL; - else if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP))) - /* Single and double float point must both be set when enable FP */ - ret = -EINVAL; - else if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP)) - /* FP should be set when enable LSX */ - ret = -EINVAL; - else if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX)) - /* LSX, FP should be set when enable LASX, and FP has been checked before. */ - ret = -EINVAL; - break; + if (!(val & CPUCFG2_LLFTP)) + /* Guests must have a constant timer */ + return -EINVAL; + if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP))) + /* Single and double float point must both be set when FP is enabled */ + return -EINVAL; + if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP)) + /* LSX architecturally implies FP but val does not satisfy that */ + return -EINVAL; + if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX)) + /* LASX architecturally implies LSX and FP but val does not satisfy that */ + return -EINVAL; + return 0; default: - break; + /* + * Values for the other CPUCFG IDs are not being further validated + * besides the mask check above. + */ + return 0; } - return ret; } static int kvm_get_one_reg(struct kvm_vcpu *vcpu, @@ -566,7 +565,7 @@ static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu, uint64_t val; uint64_t __user *uaddr = (uint64_t __user *)attr->addr; - ret = _kvm_get_cpucfg(attr->attr, &val); + ret = _kvm_get_cpucfg_mask(attr->attr, &val); if (ret) return ret; diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h index 0a175ac87698..0f42f5c8e3b6 100644 --- a/arch/parisc/include/asm/kprobes.h +++ b/arch/parisc/include/asm/kprobes.h @@ -10,9 +10,10 @@ #ifndef _PARISC_KPROBES_H #define _PARISC_KPROBES_H +#include <asm-generic/kprobes.h> + #ifdef CONFIG_KPROBES -#include <asm-generic/kprobes.h> #include <linux/types.h> #include <linux/ptrace.h> #include <linux/notifier.h> diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index d1defb9ede70..621a4b386ae4 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -78,7 +78,7 @@ asmlinkage void notrace __hot ftrace_function_trampoline(unsigned long parent, #endif } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_FUNCTION_GRAPH_TRACER) int ftrace_enable_ftrace_graph_caller(void) { static_key_enable(&ftrace_graph_enable.key); diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index e95a977ba5f3..bf73562706b2 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -172,7 +172,6 @@ static int __init processor_probe(struct parisc_device *dev) p->cpu_num = cpu_info.cpu_num; p->cpu_loc = cpu_info.cpu_loc; - set_cpu_possible(cpuid, true); store_cpu_topology(cpuid); #ifdef CONFIG_SMP @@ -474,13 +473,6 @@ static struct parisc_driver cpu_driver __refdata = { */ void __init processor_init(void) { - unsigned int cpu; - reset_cpu_topology(); - - /* reset possible mask. We will mark those which are possible. */ - for_each_possible_cpu(cpu) - set_cpu_possible(cpu, false); - register_parisc_driver(&cpu_driver); } diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 27ae40a443b8..f7e0fee5ee55 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -228,10 +228,8 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int #ifdef CONFIG_IRQSTACKS extern void * const _call_on_stack; #endif /* CONFIG_IRQSTACKS */ - void *ptr; - ptr = dereference_kernel_function_descriptor(&handle_interruption); - if (pc_is_kernel_fn(pc, ptr)) { + if (pc_is_kernel_fn(pc, handle_interruption)) { struct pt_regs *regs = (struct pt_regs *)(info->sp - frame_size - PT_SZ_ALGN); dbg("Unwinding through handle_interruption()\n"); info->prev_sp = regs->gr[30]; @@ -239,13 +237,13 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc_is_kernel_fn(pc, ret_from_kernel_thread) || - pc_is_kernel_fn(pc, syscall_exit)) { + if (pc == (unsigned long)&ret_from_kernel_thread || + pc == (unsigned long)&syscall_exit) { info->prev_sp = info->prev_ip = 0; return 1; } - if (pc_is_kernel_fn(pc, intr_return)) { + if (pc == (unsigned long)&intr_return) { struct pt_regs *regs; dbg("Found intr_return()\n"); @@ -257,14 +255,14 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int } if (pc_is_kernel_fn(pc, _switch_to) || - pc_is_kernel_fn(pc, _switch_to_ret)) { + pc == (unsigned long)&_switch_to_ret) { info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); return 1; } #ifdef CONFIG_IRQSTACKS - if (pc_is_kernel_fn(pc, _call_on_stack)) { + if (pc == (unsigned long)&_call_on_stack) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index ce2b1b5eebdd..a8b7e8682f5b 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -30,6 +30,16 @@ void *pci_traverse_device_nodes(struct device_node *start, void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); +#if defined(CONFIG_IOMMU_API) && (defined(CONFIG_PPC_PSERIES) || \ + defined(CONFIG_PPC_POWERNV)) +extern void ppc_iommu_register_device(struct pci_controller *phb); +extern void ppc_iommu_unregister_device(struct pci_controller *phb); +#else +static inline void ppc_iommu_register_device(struct pci_controller *phb) { } +static inline void ppc_iommu_unregister_device(struct pci_controller *phb) { } +#endif + + /* From rtas_pci.h */ extern void init_pci_config_tokens (void); extern unsigned long get_phb_buid (struct device_node *); diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9bb2210c8d44..065ffd1b2f8a 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -69,7 +69,7 @@ enum rtas_function_index { RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE, RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2, RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW, - RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS, + RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW, RTAS_FNIDX__IBM_SCAN_LOG_DUMP, RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR, RTAS_FNIDX__IBM_SET_EEH_OPTION, @@ -164,7 +164,7 @@ typedef struct { #define RTAS_FN_IBM_READ_SLOT_RESET_STATE rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE) #define RTAS_FN_IBM_READ_SLOT_RESET_STATE2 rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2) #define RTAS_FN_IBM_REMOVE_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW) -#define RTAS_FN_IBM_RESET_PE_DMA_WINDOWS rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS) +#define RTAS_FN_IBM_RESET_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW) #define RTAS_FN_IBM_SCAN_LOG_DUMP rtas_fn_handle(RTAS_FNIDX__IBM_SCAN_LOG_DUMP) #define RTAS_FN_IBM_SET_DYNAMIC_INDICATOR rtas_fn_handle(RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR) #define RTAS_FN_IBM_SET_EEH_OPTION rtas_fn_handle(RTAS_FNIDX__IBM_SET_EEH_OPTION) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index a9bebfd56b3b..1185efebf032 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1360,7 +1360,7 @@ static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) struct pci_controller *hose; if (!dev_is_pci(dev)) - return ERR_PTR(-EPERM); + return ERR_PTR(-ENODEV); pdev = to_pci_dev(dev); hose = pdev->bus->sysdata; @@ -1409,6 +1409,21 @@ static const struct attribute_group *spapr_tce_iommu_groups[] = { NULL, }; +void ppc_iommu_register_device(struct pci_controller *phb) +{ + iommu_device_sysfs_add(&phb->iommu, phb->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + phb->global_number); + iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, + phb->parent); +} + +void ppc_iommu_unregister_device(struct pci_controller *phb) +{ + iommu_device_unregister(&phb->iommu); + iommu_device_sysfs_remove(&phb->iommu); +} + /* * This registers IOMMU devices of PHBs. This needs to happen * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and @@ -1419,11 +1434,7 @@ static int __init spapr_tce_setup_phb_iommus_initcall(void) struct pci_controller *hose; list_for_each_entry(hose, &hose_list, list_node) { - iommu_device_sysfs_add(&hose->iommu, hose->parent, - spapr_tce_iommu_groups, "iommu-phb%04x", - hose->global_number); - iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, - hose->parent); + ppc_iommu_register_device(hose); } return 0; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 7e793b503e29..8064d9c3de86 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -375,8 +375,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = { .name = "ibm,remove-pe-dma-window", }, - [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = { - .name = "ibm,reset-pe-dma-windows", + [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW] = { + /* + * Note: PAPR+ v2.13 7.3.31.4.1 spells this as + * "ibm,reset-pe-dma-windows" (plural), but RTAS + * implementations use the singular form in practice. + */ + .name = "ibm,reset-pe-dma-window", }, [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = { .name = "ibm,scan-log-dump", diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 52427fc2a33f..0b921704da45 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -391,6 +391,24 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) /* Dummy value used in computing PCR value below */ #define PCR_ARCH_31 (PCR_ARCH_300 << 1) +static inline unsigned long map_pcr_to_cap(unsigned long pcr) +{ + unsigned long cap = 0; + + switch (pcr) { + case PCR_ARCH_300: + cap = H_GUEST_CAP_POWER9; + break; + case PCR_ARCH_31: + cap = H_GUEST_CAP_POWER10; + break; + default: + break; + } + + return cap; +} + static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { unsigned long host_pcr_bit = 0, guest_pcr_bit = 0, cap = 0; @@ -424,11 +442,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) break; case PVR_ARCH_300: guest_pcr_bit = PCR_ARCH_300; - cap = H_GUEST_CAP_POWER9; break; case PVR_ARCH_31: guest_pcr_bit = PCR_ARCH_31; - cap = H_GUEST_CAP_POWER10; break; default: return -EINVAL; @@ -440,6 +456,12 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) return -EINVAL; if (kvmhv_on_pseries() && kvmhv_is_nestedv2()) { + /* + * 'arch_compat == 0' would mean the guest should default to + * L1's compatibility. In this case, the guest would pick + * host's PCR and evaluate the corresponding capabilities. + */ + cap = map_pcr_to_cap(guest_pcr_bit); if (!(cap & nested_capabilities)) return -EINVAL; } diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index 5378eb40b162..8e6f5355f08b 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -138,6 +138,7 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb, vector128 v; int rc, i; u16 iden; + u32 arch_compat = 0; vcpu = gsm->data; @@ -347,8 +348,23 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb, break; } case KVMPPC_GSID_LOGICAL_PVR: - rc = kvmppc_gse_put_u32(gsb, iden, - vcpu->arch.vcore->arch_compat); + /* + * Though 'arch_compat == 0' would mean the default + * compatibility, arch_compat, being a Guest Wide + * Element, cannot be filled with a value of 0 in GSB + * as this would result into a kernel trap. + * Hence, when `arch_compat == 0`, arch_compat should + * default to L1's PVR. + */ + if (!vcpu->arch.vcore->arch_compat) { + if (cpu_has_feature(CPU_FTR_ARCH_31)) + arch_compat = PVR_ARCH_31; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) + arch_compat = PVR_ARCH_300; + } else { + arch_compat = vcpu->arch.vcore->arch_compat; + } + rc = kvmppc_gse_put_u32(gsb, iden, arch_compat); break; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 496e16c588aa..e8c4129697b1 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -574,29 +574,6 @@ static void iommu_table_setparms(struct pci_controller *phb, struct iommu_table_ops iommu_table_lpar_multi_ops; -/* - * iommu_table_setparms_lpar - * - * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. - */ -static void iommu_table_setparms_lpar(struct pci_controller *phb, - struct device_node *dn, - struct iommu_table *tbl, - struct iommu_table_group *table_group, - const __be32 *dma_window) -{ - unsigned long offset, size, liobn; - - of_parse_dma_window(dn, dma_window, &liobn, &offset, &size); - - iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL, - &iommu_table_lpar_multi_ops); - - - table_group->tce32_start = offset; - table_group->tce32_size = size; -} - struct iommu_table_ops iommu_table_pseries_ops = { .set = tce_build_pSeries, .clear = tce_free_pSeries, @@ -724,26 +701,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { * dynamic 64bit DMA window, walking up the device tree. */ static struct device_node *pci_dma_find(struct device_node *dn, - const __be32 **dma_window) + struct dynamic_dma_window_prop *prop) { - const __be32 *dw = NULL; + const __be32 *default_prop = NULL; + const __be32 *ddw_prop = NULL; + struct device_node *rdn = NULL; + bool default_win = false, ddw_win = false; for ( ; dn && PCI_DN(dn); dn = dn->parent) { - dw = of_get_property(dn, "ibm,dma-window", NULL); - if (dw) { - if (dma_window) - *dma_window = dw; - return dn; + default_prop = of_get_property(dn, "ibm,dma-window", NULL); + if (default_prop) { + rdn = dn; + default_win = true; + } + ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; + } + ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; } - dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); - if (dw) - return dn; - dw = of_get_property(dn, DMA64_PROPNAME, NULL); - if (dw) - return dn; + + /* At least found default window, which is the case for normal boot */ + if (default_win) + break; } - return NULL; + /* For PCI devices there will always be a DMA window, either on the device + * or parent bus + */ + WARN_ON(!(default_win | ddw_win)); + + /* caller doesn't want to get DMA window property */ + if (!prop) + return rdn; + + /* parse DMA window property. During normal system boot, only default + * DMA window is passed in OF. But, for kdump, a dedicated adapter might + * have both default and DDW in FDT. In this scenario, DDW takes precedence + * over default window. + */ + if (ddw_win) { + struct dynamic_dma_window_prop *p; + + p = (struct dynamic_dma_window_prop *)ddw_prop; + prop->liobn = p->liobn; + prop->dma_base = p->dma_base; + prop->tce_shift = p->tce_shift; + prop->window_shift = p->window_shift; + } else if (default_win) { + unsigned long offset, size, liobn; + + of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size); + + prop->liobn = cpu_to_be32((u32)liobn); + prop->dma_base = cpu_to_be64(offset); + prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K); + prop->window_shift = cpu_to_be32(order_base_2(size)); + } + + return rdn; } static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) @@ -751,17 +773,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) struct iommu_table *tbl; struct device_node *dn, *pdn; struct pci_dn *ppci; - const __be32 *dma_window = NULL; + struct dynamic_dma_window_prop prop; dn = pci_bus_to_OF_node(bus); pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); - if (dma_window == NULL) - pr_debug(" no ibm,dma-window property !\n"); + /* In PPC architecture, there will always be DMA window on bus or one of the + * parent bus. During reboot, there will be ibm,dma-window property to + * define DMA window. For kdump, there will at least be default window or DDW + * or both. + */ ppci = PCI_DN(pdn); @@ -771,13 +796,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->table_group) { ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); tbl = ppci->table_group->tables[0]; - if (dma_window) { - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, - ppci->table_group, dma_window); - if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) - panic("Failed to initialize iommu table"); - } + iommu_table_setparms_common(tbl, ppci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); + + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -968,6 +1004,12 @@ static void find_existing_ddw_windows_named(const char *name) continue; } + /* If at the time of system initialization, there are DDWs in OF, + * it means this is during kexec. DDW could be direct or dynamic. + * We will just mark DDWs as "dynamic" since this is kdump path, + * no need to worry about perforance. ddw_list_new_entry() will + * set window->direct = false. + */ window = ddw_list_new_entry(pdn, dma64); if (!window) { of_node_put(pdn); @@ -1524,8 +1566,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) { struct device_node *pdn, *dn; struct iommu_table *tbl; - const __be32 *dma_window = NULL; struct pci_dn *pci; + struct dynamic_dma_window_prop prop; pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); @@ -1538,7 +1580,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); if (!pdn || !PCI_DN(pdn)) { printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " "no DMA window found for pci dev=%s dn=%pOF\n", @@ -1551,8 +1593,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) if (!pci->table_group) { pci->table_group = iommu_pseries_alloc_group(pci->phb->node); tbl = pci->table_group->tables[0]; - iommu_table_setparms_lpar(pci->phb, pdn, tbl, - pci->table_group, dma_window); + + iommu_table_setparms_common(tbl, pci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + pci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); iommu_init_table(tbl, pci->phb->node, 0, 0); iommu_register_group(pci->table_group, diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 4ba824568119..4448386268d9 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -35,6 +35,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pseries_msi_allocate_domains(phb); + ppc_iommu_register_device(phb); + /* Create EEH devices for the PHB */ eeh_phb_pe_create(phb); @@ -76,6 +78,8 @@ int remove_phb_dynamic(struct pci_controller *phb) } } + ppc_iommu_unregister_device(phb); + pseries_msi_free_domains(phb); /* Keep a reference so phb isn't freed yet */ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bffbd869a068..e3142ce531a0 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -315,7 +315,6 @@ config AS_HAS_OPTION_ARCH # https://reviews.llvm.org/D123515 def_bool y depends on $(as-instr, .option arch$(comma) +m) - depends on !$(as-instr, .option arch$(comma) -i) source "arch/riscv/Kconfig.socs" source "arch/riscv/Kconfig.errata" diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 07387f9c135c..72b87b08ab44 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -123,6 +123,7 @@ interrupt-parent = <&gpio>; interrupts = <1 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; onkey { compatible = "dlg,da9063-onkey"; diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi index c216aaecac53..8bcf36d07f3f 100644 --- a/arch/riscv/boot/dts/starfive/jh7100.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi @@ -96,14 +96,14 @@ thermal-sensors = <&sfctemp>; trips { - cpu_alert0 { + cpu-alert0 { /* milliCelsius */ temperature = <75000>; hysteresis = <2000>; type = "passive"; }; - cpu_crit { + cpu-crit { /* milliCelsius */ temperature = <90000>; hysteresis = <2000>; @@ -113,28 +113,28 @@ }; }; - osc_sys: osc_sys { + osc_sys: osc-sys { compatible = "fixed-clock"; #clock-cells = <0>; /* This value must be overridden by the board */ clock-frequency = <0>; }; - osc_aud: osc_aud { + osc_aud: osc-aud { compatible = "fixed-clock"; #clock-cells = <0>; /* This value must be overridden by the board */ clock-frequency = <0>; }; - gmac_rmii_ref: gmac_rmii_ref { + gmac_rmii_ref: gmac-rmii-ref { compatible = "fixed-clock"; #clock-cells = <0>; /* Should be overridden by the board when needed */ clock-frequency = <0>; }; - gmac_gr_mii_rxclk: gmac_gr_mii_rxclk { + gmac_gr_mii_rxclk: gmac-gr-mii-rxclk { compatible = "fixed-clock"; #clock-cells = <0>; /* Should be overridden by the board when needed */ diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi index 45213cdf50dc..74ed3b9264d8 100644 --- a/arch/riscv/boot/dts/starfive/jh7110.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi @@ -237,14 +237,14 @@ }; trips { - cpu_alert0: cpu_alert0 { + cpu_alert0: cpu-alert0 { /* milliCelsius */ temperature = <85000>; hysteresis = <2000>; type = "passive"; }; - cpu_crit { + cpu-crit { /* milliCelsius */ temperature = <100000>; hysteresis = <2000>; diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 510014051f5d..2468c55933cd 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -424,6 +424,7 @@ # define CSR_STATUS CSR_MSTATUS # define CSR_IE CSR_MIE # define CSR_TVEC CSR_MTVEC +# define CSR_ENVCFG CSR_MENVCFG # define CSR_SCRATCH CSR_MSCRATCH # define CSR_EPC CSR_MEPC # define CSR_CAUSE CSR_MCAUSE @@ -448,6 +449,7 @@ # define CSR_STATUS CSR_SSTATUS # define CSR_IE CSR_SIE # define CSR_TVEC CSR_STVEC +# define CSR_ENVCFG CSR_SENVCFG # define CSR_SCRATCH CSR_SSCRATCH # define CSR_EPC CSR_SEPC # define CSR_CAUSE CSR_SCAUSE diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 329172122952..15055f9df4da 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -25,6 +25,11 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ + +extern void *return_address(unsigned int level); + +#define ftrace_return_address(n) return_address(n) + void MCOUNT_NAME(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index 20f9c3ba2341..22deb7a2a6ec 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -11,8 +11,10 @@ static inline void arch_clear_hugepage_flags(struct page *page) } #define arch_clear_hugepage_flags arch_clear_hugepage_flags +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION bool arch_hugetlb_migration_supported(struct hstate *h); #define arch_hugetlb_migration_supported arch_hugetlb_migration_supported +#endif #ifdef CONFIG_RISCV_ISA_SVNAPOT #define __HAVE_ARCH_HUGE_PTE_CLEAR diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 5340f818746b..1f2d2599c655 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -81,6 +81,8 @@ #define RISCV_ISA_EXT_ZTSO 72 #define RISCV_ISA_EXT_ZACAS 73 +#define RISCV_ISA_EXT_XLINUXENVCFG 127 + #define RISCV_ISA_EXT_MAX 128 #define RISCV_ISA_EXT_INVALID U32_MAX diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index d169a4f41a2e..c80bb9990d32 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -95,7 +95,13 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) __pud_free(mm, pud); } -#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud) +#define __pud_free_tlb(tlb, pud, addr) \ +do { \ + if (pgtable_l4_enabled) { \ + pagetable_pud_dtor(virt_to_ptdesc(pud)); \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ + } \ +} while (0) #define p4d_alloc_one p4d_alloc_one static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) @@ -124,7 +130,11 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) __p4d_free(mm, p4d); } -#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) +#define __p4d_free_tlb(tlb, p4d, addr) \ +do { \ + if (pgtable_l5_enabled) \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d)); \ +} while (0) #endif /* __PAGETABLE_PMD_FOLDED */ static inline void sync_kernel_mappings(pgd_t *pgd) @@ -149,7 +159,11 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) #ifndef __PAGETABLE_PMD_FOLDED -#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd) +#define __pmd_free_tlb(tlb, pmd, addr) \ +do { \ + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ +} while (0) #endif /* __PAGETABLE_PMD_FOLDED */ diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index b42017d76924..b99bd66107a6 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -136,7 +136,7 @@ enum napot_cont_order { * 10010 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable */ #define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60)) -#define _PAGE_NOCACHE_THEAD ((1UL < 61) | (1UL << 60)) +#define _PAGE_NOCACHE_THEAD ((1UL << 61) | (1UL << 60)) #define _PAGE_IO_THEAD ((1UL << 63) | (1UL << 60)) #define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59)) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 0c94260b5d0c..6066822e7396 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -84,7 +84,7 @@ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. */ -#define vmemmap ((struct page *)VMEMMAP_START) +#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) #define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START @@ -439,6 +439,10 @@ static inline pte_t pte_mkhuge(pte_t pte) return pte; } +#define pte_leaf_size(pte) (pte_napot(pte) ? \ + napot_cont_size(napot_cont_order(pte)) :\ + PAGE_SIZE) + #ifdef CONFIG_NUMA_BALANCING /* * See the comment in include/asm-generic/pgtable.h diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h index 02f87867389a..491296a335d0 100644 --- a/arch/riscv/include/asm/suspend.h +++ b/arch/riscv/include/asm/suspend.h @@ -14,6 +14,7 @@ struct suspend_context { struct pt_regs regs; /* Saved and restored by high-level functions */ unsigned long scratch; + unsigned long envcfg; unsigned long tvec; unsigned long ie; #ifdef CONFIG_MMU diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h index 924d01b56c9a..51f6dfe19745 100644 --- a/arch/riscv/include/asm/vmalloc.h +++ b/arch/riscv/include/asm/vmalloc.h @@ -19,65 +19,6 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot) return true; } -#ifdef CONFIG_RISCV_ISA_SVNAPOT -#include <linux/pgtable.h> +#endif -#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size -static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end, - u64 pfn, unsigned int max_page_shift) -{ - unsigned long map_size = PAGE_SIZE; - unsigned long size, order; - - if (!has_svnapot()) - return map_size; - - for_each_napot_order_rev(order) { - if (napot_cont_shift(order) > max_page_shift) - continue; - - size = napot_cont_size(order); - if (end - addr < size) - continue; - - if (!IS_ALIGNED(addr, size)) - continue; - - if (!IS_ALIGNED(PFN_PHYS(pfn), size)) - continue; - - map_size = size; - break; - } - - return map_size; -} - -#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift -static inline int arch_vmap_pte_supported_shift(unsigned long size) -{ - int shift = PAGE_SHIFT; - unsigned long order; - - if (!has_svnapot()) - return shift; - - WARN_ON_ONCE(size >= PMD_SIZE); - - for_each_napot_order_rev(order) { - if (napot_cont_size(order) > size) - continue; - - if (!IS_ALIGNED(size, napot_cont_size(order))) - continue; - - shift = napot_cont_shift(order); - break; - } - - return shift; -} - -#endif /* CONFIG_RISCV_ISA_SVNAPOT */ -#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ #endif /* _ASM_RISCV_VMALLOC_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f71910718053..604d6bf7e476 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) @@ -46,6 +47,7 @@ obj-y += irq.o obj-y += process.o obj-y += ptrace.o obj-y += reset.o +obj-y += return_address.o obj-y += setup.o obj-y += signal.o obj-y += syscall_table.o diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 89920f84d0a3..79a5a35fab96 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -24,6 +24,7 @@ #include <asm/hwprobe.h> #include <asm/patch.h> #include <asm/processor.h> +#include <asm/sbi.h> #include <asm/vector.h> #include "copy-unaligned.h" @@ -202,6 +203,16 @@ static const unsigned int riscv_zvbb_exts[] = { }; /* + * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V + * privileged ISA, the existence of the CSRs is implied by any extension which + * specifies [ms]envcfg bit(s). Hence, we define a custom ISA extension for the + * existence of the CSR, and treat it as a subset of those other extensions. + */ +static const unsigned int riscv_xlinuxenvcfg_exts[] = { + RISCV_ISA_EXT_XLINUXENVCFG +}; + +/* * The canonical order of ISA extension names in the ISA string is defined in * chapter 27 of the unprivileged specification. * @@ -250,8 +261,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c), __RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v), __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), - __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), - __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), + __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts), + __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), @@ -539,6 +550,20 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) } /* + * "V" in ISA strings is ambiguous in practice: it should mean + * just the standard V-1.0 but vendors aren't well behaved. + * Many vendors with T-Head CPU cores which implement the 0.7.1 + * version of the vector specification put "v" into their DTs. + * CPU cores with the ratified spec will contain non-zero + * marchid. + */ + if (acpi_disabled && riscv_cached_mvendorid(cpu) == THEAD_VENDOR_ID && + riscv_cached_marchid(cpu) == 0x0) { + this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v]; + clear_bit(RISCV_ISA_EXT_v, isainfo->isa); + } + + /* * All "okay" hart should have same isa. Set HWCAP based on * common capabilities of every "okay" hart, in case they don't * have. @@ -950,7 +975,7 @@ arch_initcall(check_unaligned_access_all_cpus); void riscv_user_isa_enable(void) { if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) - csr_set(CSR_SENVCFG, ENVCFG_CBZE); + csr_set(CSR_ENVCFG, ENVCFG_CBZE); } #ifdef CONFIG_RISCV_ALTERNATIVE diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c new file mode 100644 index 000000000000..c8115ec8fb30 --- /dev/null +++ b/arch/riscv/kernel/return_address.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This code come from arch/arm64/kernel/return_address.c + * + * Copyright (C) 2023 SiFive. + */ + +#include <linux/export.h> +#include <linux/kprobes.h> +#include <linux/stacktrace.h> + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static bool save_return_addr(void *d, unsigned long pc) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)pc; + return false; + } + + --data->level; + + return true; +} +NOKPROBE_SYMBOL(save_return_addr); + +noinline void *return_address(unsigned int level) +{ + struct return_address_data data; + + data.level = level + 3; + data.addr = NULL; + + arch_stack_walk(save_return_addr, &data, current, NULL); + + if (!data.level) + return data.addr; + else + return NULL; + +} +EXPORT_SYMBOL_GPL(return_address); +NOKPROBE_SYMBOL(return_address); diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 239509367e42..299795341e8a 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -15,6 +15,8 @@ void suspend_save_csrs(struct suspend_context *context) { context->scratch = csr_read(CSR_SCRATCH); + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + context->envcfg = csr_read(CSR_ENVCFG); context->tvec = csr_read(CSR_TVEC); context->ie = csr_read(CSR_IE); @@ -36,6 +38,8 @@ void suspend_save_csrs(struct suspend_context *context) void suspend_restore_csrs(struct suspend_context *context) { csr_write(CSR_SCRATCH, context->scratch); + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + csr_write(CSR_ENVCFG, context->envcfg); csr_write(CSR_TVEC, context->tvec); csr_write(CSR_IE, context->ie); diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 29c7606414d2..5ef2a6891158 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -426,10 +426,12 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return __hugetlb_valid_size(size); } +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION bool arch_hugetlb_migration_supported(struct hstate *h) { return __hugetlb_valid_size(huge_page_size(h)); } +#endif #ifdef CONFIG_CONTIG_ALLOC static __init int gigantic_pages_init(void) diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index a5ce1ab76ece..8b35f12a4452 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -18,6 +18,11 @@ static inline bool rvc_enabled(void) return IS_ENABLED(CONFIG_RISCV_ISA_C); } +static inline bool rvzbb_enabled(void) +{ + return IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && riscv_has_extension_likely(RISCV_ISA_EXT_ZBB); +} + enum { RV_REG_ZERO = 0, /* The constant value 0 */ RV_REG_RA = 1, /* Return address */ @@ -730,6 +735,33 @@ static inline u16 rvc_swsp(u32 imm8, u8 rs2) return rv_css_insn(0x6, imm, rs2, 0x2); } +/* RVZBB instrutions. */ +static inline u32 rvzbb_sextb(u8 rd, u8 rs1) +{ + return rv_i_insn(0x604, rs1, 1, rd, 0x13); +} + +static inline u32 rvzbb_sexth(u8 rd, u8 rs1) +{ + return rv_i_insn(0x605, rs1, 1, rd, 0x13); +} + +static inline u32 rvzbb_zexth(u8 rd, u8 rs) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return rv_i_insn(0x80, rs, 4, rd, 0x3b); + + return rv_i_insn(0x80, rs, 4, rd, 0x33); +} + +static inline u32 rvzbb_rev8(u8 rd, u8 rs) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return rv_i_insn(0x6b8, rs, 5, rd, 0x13); + + return rv_i_insn(0x698, rs, 5, rd, 0x13); +} + /* * RV64-only instructions. * @@ -1087,6 +1119,108 @@ static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) emit(rv_subw(rd, rs1, rs2), ctx); } +static inline void emit_sextb(u8 rd, u8 rs, struct rv_jit_context *ctx) +{ + if (rvzbb_enabled()) { + emit(rvzbb_sextb(rd, rs), ctx); + return; + } + + emit_slli(rd, rs, 56, ctx); + emit_srai(rd, rd, 56, ctx); +} + +static inline void emit_sexth(u8 rd, u8 rs, struct rv_jit_context *ctx) +{ + if (rvzbb_enabled()) { + emit(rvzbb_sexth(rd, rs), ctx); + return; + } + + emit_slli(rd, rs, 48, ctx); + emit_srai(rd, rd, 48, ctx); +} + +static inline void emit_sextw(u8 rd, u8 rs, struct rv_jit_context *ctx) +{ + emit_addiw(rd, rs, 0, ctx); +} + +static inline void emit_zexth(u8 rd, u8 rs, struct rv_jit_context *ctx) +{ + if (rvzbb_enabled()) { + emit(rvzbb_zexth(rd, rs), ctx); + return; + } + + emit_slli(rd, rs, 48, ctx); + emit_srli(rd, rd, 48, ctx); +} + +static inline void emit_zextw(u8 rd, u8 rs, struct rv_jit_context *ctx) +{ + emit_slli(rd, rs, 32, ctx); + emit_srli(rd, rd, 32, ctx); +} + +static inline void emit_bswap(u8 rd, s32 imm, struct rv_jit_context *ctx) +{ + if (rvzbb_enabled()) { + int bits = 64 - imm; + + emit(rvzbb_rev8(rd, rd), ctx); + if (bits) + emit_srli(rd, rd, bits, ctx); + return; + } + + emit_li(RV_REG_T2, 0, ctx); + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + if (imm == 16) + goto out_be; + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + if (imm == 32) + goto out_be; + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); +out_be: + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + + emit_mv(rd, RV_REG_T2, ctx); +} + #endif /* __riscv_xlen == 64 */ void bpf_jit_build_prologue(struct rv_jit_context *ctx); diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 719a97e7edb2..869e4282a2c4 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -141,6 +141,19 @@ static bool in_auipc_jalr_range(s64 val) val < ((1L << 31) - (1L << 11)); } +/* Modify rd pointer to alternate reg to avoid corrupting original reg */ +static void emit_sextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx) +{ + emit_sextw(ra, *rd, ctx); + *rd = ra; +} + +static void emit_zextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx) +{ + emit_zextw(ra, *rd, ctx); + *rd = ra; +} + /* Emit fixed-length instructions for address */ static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx) { @@ -326,12 +339,6 @@ static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff, emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx); } -static void emit_zext_32(u8 reg, struct rv_jit_context *ctx) -{ - emit_slli(reg, reg, 32, ctx); - emit_srli(reg, reg, 32, ctx); -} - static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) { int tc_ninsn, off, start_insn = ctx->ninsns; @@ -346,7 +353,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) */ tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] : ctx->offset[0]; - emit_zext_32(RV_REG_A2, ctx); + emit_zextw(RV_REG_A2, RV_REG_A2, ctx); off = offsetof(struct bpf_array, map.max_entries); if (is_12b_check(off, insn)) @@ -405,38 +412,6 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn, *rs = bpf_to_rv_reg(insn->src_reg, ctx); } -static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) -{ - emit_mv(RV_REG_T2, *rd, ctx); - emit_zext_32(RV_REG_T2, ctx); - emit_mv(RV_REG_T1, *rs, ctx); - emit_zext_32(RV_REG_T1, ctx); - *rd = RV_REG_T2; - *rs = RV_REG_T1; -} - -static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) -{ - emit_addiw(RV_REG_T2, *rd, 0, ctx); - emit_addiw(RV_REG_T1, *rs, 0, ctx); - *rd = RV_REG_T2; - *rs = RV_REG_T1; -} - -static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx) -{ - emit_mv(RV_REG_T2, *rd, ctx); - emit_zext_32(RV_REG_T2, ctx); - emit_zext_32(RV_REG_T1, ctx); - *rd = RV_REG_T2; -} - -static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx) -{ - emit_addiw(RV_REG_T2, *rd, 0, ctx); - *rd = RV_REG_T2; -} - static int emit_jump_and_link(u8 rd, s64 rvoff, bool fixed_addr, struct rv_jit_context *ctx) { @@ -519,32 +494,32 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) : rv_amoadd_w(rs, rs, rd, 0, 0), ctx); if (!is64) - emit_zext_32(rs, ctx); + emit_zextw(rs, rs, ctx); break; case BPF_AND | BPF_FETCH: emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) : rv_amoand_w(rs, rs, rd, 0, 0), ctx); if (!is64) - emit_zext_32(rs, ctx); + emit_zextw(rs, rs, ctx); break; case BPF_OR | BPF_FETCH: emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) : rv_amoor_w(rs, rs, rd, 0, 0), ctx); if (!is64) - emit_zext_32(rs, ctx); + emit_zextw(rs, rs, ctx); break; case BPF_XOR | BPF_FETCH: emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) : rv_amoxor_w(rs, rs, rd, 0, 0), ctx); if (!is64) - emit_zext_32(rs, ctx); + emit_zextw(rs, rs, ctx); break; /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */ case BPF_XCHG: emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) : rv_amoswap_w(rs, rs, rd, 0, 0), ctx); if (!is64) - emit_zext_32(rs, ctx); + emit_zextw(rs, rs, ctx); break; /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */ case BPF_CMPXCHG: @@ -1091,7 +1066,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU64 | BPF_MOV | BPF_X: if (imm == 1) { /* Special mov32 for zext */ - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; } switch (insn->off) { @@ -1099,16 +1074,17 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_mv(rd, rs, ctx); break; case 8: + emit_sextb(rd, rs, ctx); + break; case 16: - emit_slli(RV_REG_T1, rs, 64 - insn->off, ctx); - emit_srai(rd, RV_REG_T1, 64 - insn->off, ctx); + emit_sexth(rd, rs, ctx); break; case 32: - emit_addiw(rd, rs, 0, ctx); + emit_sextw(rd, rs, ctx); break; } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; /* dst = dst OP src */ @@ -1116,7 +1092,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU64 | BPF_ADD | BPF_X: emit_add(rd, rd, rs, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_SUB | BPF_X: case BPF_ALU64 | BPF_SUB | BPF_X: @@ -1126,31 +1102,31 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_subw(rd, rd, rs, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_AND | BPF_X: case BPF_ALU64 | BPF_AND | BPF_X: emit_and(rd, rd, rs, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_OR | BPF_X: case BPF_ALU64 | BPF_OR | BPF_X: emit_or(rd, rd, rs, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_XOR | BPF_X: case BPF_ALU64 | BPF_XOR | BPF_X: emit_xor(rd, rd, rs, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_X: emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: @@ -1159,7 +1135,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, else emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: @@ -1168,25 +1144,25 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, else emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_RSH | BPF_X: emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X: emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; /* dst = -dst */ @@ -1194,73 +1170,27 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU64 | BPF_NEG: emit_sub(rd, RV_REG_ZERO, rd, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; /* dst = BSWAP##imm(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: switch (imm) { case 16: - emit_slli(rd, rd, 48, ctx); - emit_srli(rd, rd, 48, ctx); + emit_zexth(rd, rd, ctx); break; case 32: if (!aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case 64: /* Do nothing */ break; } break; - case BPF_ALU | BPF_END | BPF_FROM_BE: case BPF_ALU64 | BPF_END | BPF_FROM_LE: - emit_li(RV_REG_T2, 0, ctx); - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - if (imm == 16) - goto out_be; - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - if (imm == 32) - goto out_be; - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); -out_be: - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - - emit_mv(rd, RV_REG_T2, ctx); + emit_bswap(rd, imm, ctx); break; /* dst = imm */ @@ -1268,7 +1198,7 @@ out_be: case BPF_ALU64 | BPF_MOV | BPF_K: emit_imm(rd, imm, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; /* dst = dst OP imm */ @@ -1281,7 +1211,7 @@ out_be: emit_add(rd, rd, RV_REG_T1, ctx); } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K: @@ -1292,7 +1222,7 @@ out_be: emit_sub(rd, rd, RV_REG_T1, ctx); } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K: @@ -1303,7 +1233,7 @@ out_be: emit_and(rd, rd, RV_REG_T1, ctx); } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: @@ -1314,7 +1244,7 @@ out_be: emit_or(rd, rd, RV_REG_T1, ctx); } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: @@ -1325,7 +1255,7 @@ out_be: emit_xor(rd, rd, RV_REG_T1, ctx); } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: @@ -1333,7 +1263,7 @@ out_be: emit(is64 ? rv_mul(rd, rd, RV_REG_T1) : rv_mulw(rd, rd, RV_REG_T1), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: @@ -1345,7 +1275,7 @@ out_be: emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : rv_divuw(rd, rd, RV_REG_T1), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: @@ -1357,14 +1287,14 @@ out_be: emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : rv_remuw(rd, rd, RV_REG_T1), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_LSH | BPF_K: emit_slli(rd, rd, imm, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K: @@ -1374,7 +1304,7 @@ out_be: emit(rv_srliw(rd, rd, imm), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: @@ -1384,7 +1314,7 @@ out_be: emit(rv_sraiw(rd, rd, imm), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; /* JUMP off */ @@ -1425,10 +1355,13 @@ out_be: rvoff = rv_offset(i, off, ctx); if (!is64) { s = ctx->ninsns; - if (is_signed_bpf_cond(BPF_OP(code))) - emit_sext_32_rd_rs(&rd, &rs, ctx); - else - emit_zext_32_rd_rs(&rd, &rs, ctx); + if (is_signed_bpf_cond(BPF_OP(code))) { + emit_sextw_alt(&rs, RV_REG_T1, ctx); + emit_sextw_alt(&rd, RV_REG_T2, ctx); + } else { + emit_zextw_alt(&rs, RV_REG_T1, ctx); + emit_zextw_alt(&rd, RV_REG_T2, ctx); + } e = ctx->ninsns; /* Adjust for extra insns */ @@ -1439,8 +1372,7 @@ out_be: /* Adjust for and */ rvoff -= 4; emit_and(RV_REG_T1, rd, rs, ctx); - emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, - ctx); + emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); } else { emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); } @@ -1469,18 +1401,18 @@ out_be: case BPF_JMP32 | BPF_JSLE | BPF_K: rvoff = rv_offset(i, off, ctx); s = ctx->ninsns; - if (imm) { + if (imm) emit_imm(RV_REG_T1, imm, ctx); - rs = RV_REG_T1; - } else { - /* If imm is 0, simply use zero register. */ - rs = RV_REG_ZERO; - } + rs = imm ? RV_REG_T1 : RV_REG_ZERO; if (!is64) { - if (is_signed_bpf_cond(BPF_OP(code))) - emit_sext_32_rd(&rd, ctx); - else - emit_zext_32_rd_t1(&rd, ctx); + if (is_signed_bpf_cond(BPF_OP(code))) { + emit_sextw_alt(&rd, RV_REG_T2, ctx); + /* rs has been sign extended */ + } else { + emit_zextw_alt(&rd, RV_REG_T2, ctx); + if (imm) + emit_zextw(rs, rs, ctx); + } } e = ctx->ninsns; @@ -1504,7 +1436,7 @@ out_be: * as t1 is used only in comparison against zero. */ if (!is64 && imm < 0) - emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx); + emit_sextw(RV_REG_T1, RV_REG_T1, ctx); e = ctx->ninsns; rvoff -= ninsns_rvoff(e - s); emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); @@ -1874,3 +1806,8 @@ bool bpf_jit_supports_kfunc_call(void) { return true; } + +bool bpf_jit_supports_ptr_xchg(void) +{ + return true; +} diff --git a/arch/s390/configs/compat.config b/arch/s390/configs/compat.config new file mode 100644 index 000000000000..6fd051453ae8 --- /dev/null +++ b/arch/s390/configs/compat.config @@ -0,0 +1,3 @@ +# Help: Enable compat support +CONFIG_COMPAT=y +CONFIG_COMPAT_32BIT_TIME=y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index cae2dd34fbb4..c924be0d7ed8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -118,7 +118,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -374,6 +373,7 @@ CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m @@ -436,9 +436,6 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set -CONFIG_MD_LINEAR=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -637,7 +634,6 @@ CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m -CONFIG_NETFS_SUPPORT=m CONFIG_NETFS_STATS=y CONFIG_FSCACHE=y CONFIG_CACHEFILES=m @@ -709,7 +705,6 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" -CONFIG_INIT_STACK_NONE=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -739,7 +734,6 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LRW=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 42b988873e54..c8f0c9fe40d7 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -109,7 +109,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -364,6 +363,7 @@ CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m @@ -426,9 +426,6 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set -CONFIG_MD_LINEAR=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -622,7 +619,6 @@ CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m -CONFIG_NETFS_SUPPORT=m CONFIG_NETFS_STATS=y CONFIG_FSCACHE=y CONFIG_CACHEFILES=m @@ -693,7 +689,6 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" -CONFIG_INIT_STACK_NONE=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m @@ -724,11 +719,9 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_OFB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CHACHA20POLY1305=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 30d2a1687665..c51f3ec4eb28 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -8,6 +8,7 @@ CONFIG_BPF_SYSCALL=y # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=2 @@ -64,7 +65,6 @@ CONFIG_ZFCP=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" -CONFIG_INIT_STACK_NONE=y # CONFIG_ZLIB_DFLTCC is not set CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 676ac74026a8..52a44e353796 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -252,7 +252,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, /* combine single writes by using store-block insn */ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) { - zpci_memcpy_toio(to, from, count); + zpci_memcpy_toio(to, from, count * 8); } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 5f6035936131..2a03daa68f28 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -60,7 +60,7 @@ libs-y += arch/sparc/prom/ libs-y += arch/sparc/lib/ drivers-$(CONFIG_PM) += arch/sparc/power/ -drivers-$(CONFIG_FB) += arch/sparc/video/ +drivers-$(CONFIG_FB_CORE) += arch/sparc/video/ boot := arch/sparc/boot diff --git a/arch/sparc/video/Makefile b/arch/sparc/video/Makefile index 6baddbd58e4d..d4d83f1702c6 100644 --- a/arch/sparc/video/Makefile +++ b/arch/sparc/video/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_FB) += fbdev.o +obj-$(CONFIG_FB_CORE) += fbdev.o diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 8c8d38f0cb1d..003379049924 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -6,6 +6,9 @@ #include <linux/export.h> #include <linux/linkage.h> #include <asm/msr-index.h> +#include <asm/unwind_hints.h> +#include <asm/segment.h> +#include <asm/cache.h> .pushsection .noinstr.text, "ax" @@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensure VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(mds_verw_sel) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(mds_verw_sel); +/* For KVM */ +EXPORT_SYMBOL_GPL(mds_verw_sel); + +.popsection + diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index c73047bf9f4b..fba427646805 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -885,6 +885,7 @@ SYM_FUNC_START(entry_SYSENTER_32) BUG_IF_WRONG_CR3 no_user_check=1 popfl popl %eax + CLEAR_CPU_BUFFERS /* * Return back to the vDSO, which will pop ecx and edx. @@ -954,6 +955,7 @@ restore_all_switch_stack: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code + CLEAR_CPU_BUFFERS .Lirq_return: /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -1146,6 +1148,7 @@ SYM_CODE_START(asm_exc_nmi) /* Not on SYSENTER stack. */ call exc_nmi + CLEAR_CPU_BUFFERS jmp .Lnmi_return .Lnmi_from_sysenter_stack: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c40f89ab1b4c..9bb485977629 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -161,6 +161,7 @@ syscall_return_via_sysret: SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR swapgs + CLEAR_CPU_BUFFERS sysretq SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -573,6 +574,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) .Lswapgs_and_iret: swapgs + CLEAR_CPU_BUFFERS /* Assert that the IRET frame indicates user mode. */ testb $3, 8(%rsp) jnz .Lnative_iret @@ -723,6 +725,8 @@ native_irq_return_ldt: */ popq %rax /* Restore user RAX */ + CLEAR_CPU_BUFFERS + /* * RSP now points to an ordinary IRET frame, except that the page * is read-only and RSP[31:16] are preloaded with the userspace @@ -1450,6 +1454,12 @@ nmi_restore: movq $0, 5*8(%rsp) /* clear "NMI executing" */ /* + * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like + * NMI in kernel after user state is restored. For an unprivileged user + * these conditions are hard to meet. + */ + + /* * iretq reads the "iret" frame and exits the NMI stack in a * single instruction. We are returning to kernel mode, so this * cannot result in a fault. Similarly, we don't need to worry @@ -1466,6 +1476,7 @@ SYM_CODE_START(entry_SYSCALL32_ignore) UNWIND_HINT_END_OF_STACK ENDBR mov $-ENOSYS, %eax + CLEAR_CPU_BUFFERS sysretl SYM_CODE_END(entry_SYSCALL32_ignore) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index de94e2e84ecc..eabf48c4d4b4 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -270,6 +270,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) xorl %r9d, %r9d xorl %r10d, %r10d swapgs + CLEAR_CPU_BUFFERS sysretl SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 96e6c51515f5..cf1b78cb2d04 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -16,6 +16,11 @@ extern struct boot_params boot_params; static struct real_mode_header hv_vtl_real_mode_header; +static bool __init hv_vtl_msi_ext_dest_id(void) +{ + return true; +} + void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); @@ -38,6 +43,8 @@ void __init hv_vtl_init_platform(void) x86_platform.legacy.warm_reset = 0; x86_platform.legacy.reserve_bios_regions = 0; x86_platform.legacy.devices.pnpbios = 0; + + x86_init.hyper.msi_ext_dest_id = hv_vtl_msi_ext_dest_id; } static inline u64 hv_vtl_system_desc_base(struct ldttss_desc *desc) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 7dcbf153ad72..768d73de0d09 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -15,6 +15,7 @@ #include <asm/io.h> #include <asm/coco.h> #include <asm/mem_encrypt.h> +#include <asm/set_memory.h> #include <asm/mshyperv.h> #include <asm/hypervisor.h> #include <asm/mtrr.h> @@ -503,6 +504,31 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], } /* + * When transitioning memory between encrypted and decrypted, the caller + * of set_memory_encrypted() or set_memory_decrypted() is responsible for + * ensuring that the memory isn't in use and isn't referenced while the + * transition is in progress. The transition has multiple steps, and the + * memory is in an inconsistent state until all steps are complete. A + * reference while the state is inconsistent could result in an exception + * that can't be cleanly fixed up. + * + * But the Linux kernel load_unaligned_zeropad() mechanism could cause a + * stray reference that can't be prevented by the caller, so Linux has + * specific code to handle this case. But when the #VC and #VE exceptions + * routed to a paravisor, the specific code doesn't work. To avoid this + * problem, mark the pages as "not present" while the transition is in + * progress. If load_unaligned_zeropad() causes a stray reference, a normal + * page fault is generated instead of #VC or #VE, and the page-fault-based + * handlers for load_unaligned_zeropad() resolve the reference. When the + * transition is complete, hv_vtom_set_host_visibility() marks the pages + * as "present" again. + */ +static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) +{ + return !set_memory_np(kbuffer, pagecount); +} + +/* * hv_vtom_set_host_visibility - Set specified memory visible to host. * * In Isolation VM, all guest memory is encrypted from host and guest @@ -515,16 +541,28 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo enum hv_mem_host_visibility visibility = enc ? VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; u64 *pfn_array; + phys_addr_t paddr; + void *vaddr; int ret = 0; bool result = true; int i, pfn; pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); - if (!pfn_array) - return false; + if (!pfn_array) { + result = false; + goto err_set_memory_p; + } for (i = 0, pfn = 0; i < pagecount; i++) { - pfn_array[pfn] = virt_to_hvpfn((void *)kbuffer + i * HV_HYP_PAGE_SIZE); + /* + * Use slow_virt_to_phys() because the PRESENT bit has been + * temporarily cleared in the PTEs. slow_virt_to_phys() works + * without the PRESENT bit while virt_to_hvpfn() or similar + * does not. + */ + vaddr = (void *)kbuffer + (i * HV_HYP_PAGE_SIZE); + paddr = slow_virt_to_phys(vaddr); + pfn_array[pfn] = paddr >> HV_HYP_PAGE_SHIFT; pfn++; if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { @@ -538,14 +576,30 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo } } - err_free_pfn_array: +err_free_pfn_array: kfree(pfn_array); + +err_set_memory_p: + /* + * Set the PTE PRESENT bits again to revert what hv_vtom_clear_present() + * did. Do this even if there is an error earlier in this function in + * order to avoid leaving the memory range in a "broken" state. Setting + * the PRESENT bits shouldn't fail, but return an error if it does. + */ + if (set_memory_p(kbuffer, pagecount)) + result = false; + return result; } static bool hv_vtom_tlb_flush_required(bool private) { - return true; + /* + * Since hv_vtom_clear_present() marks the PTEs as "not present" + * and flushes the TLB, they can't be in the TLB. That makes the + * flush controlled by this function redundant, so return "false". + */ + return false; } static bool hv_vtom_cache_flush_required(void) @@ -608,6 +662,7 @@ void __init hv_vtom_init(void) x86_platform.hyper.is_private_mmio = hv_is_private_mmio; x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required; x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; + x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present; x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; /* Set WB as the default cache mode. */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index fdf723b6f6d0..2b62cdd8dd12 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -95,7 +95,7 @@ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index ce8f50192ae3..7e523bb3d2d3 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - mds_user_clear_cpu_buffers(); amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 262e65539f83..2aa52cab1e46 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -315,6 +315,17 @@ #endif .endm +/* + * Macro to execute VERW instruction that mitigate transient data sampling + * attacks such as MDS. On affected systems a microcode update overloaded VERW + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. + * + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro CLEAR_CPU_BUFFERS + ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -529,13 +540,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 mds_verw_sel; + #include <asm/segment.h> /** @@ -562,17 +574,6 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability - * - * Clear CPU buffers if the corresponding static key is enabled - */ -static __always_inline void mds_user_clear_cpu_buffers(void) -{ - if (static_branch_likely(&mds_user_clear)) - mds_clear_cpu_buffers(); -} - -/** * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability * * Clear CPU buffers if the corresponding static key is enabled diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index a5e89641bd2d..9aee31862b4a 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -47,6 +47,7 @@ int set_memory_uc(unsigned long addr, int numpages); int set_memory_wc(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_np(unsigned long addr, int numpages); +int set_memory_p(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bb0ab8466b91..48d049cd74e7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -/* Control MDS CPU buffer clear before returning to user space */ -DEFINE_STATIC_KEY_FALSE(mds_user_clear); -EXPORT_SYMBOL_GPL(mds_user_clear); /* Control MDS CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); @@ -252,7 +249,7 @@ static void __init mds_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && (mds_nosmt || cpu_mitigations_auto_nosmt())) @@ -356,7 +353,7 @@ static void __init taa_select_mitigation(void) * For guests that can't determine whether the correct microcode is * present on host, enable the mitigation for UCODE_NEEDED as well. */ - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -424,7 +421,7 @@ static void __init mmio_select_mitigation(void) */ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else static_branch_enable(&mmio_stale_data_clear); @@ -484,12 +481,12 @@ static void __init md_clear_update_mitigation(void) if (cpu_mitigations_off()) return; - if (!static_key_enabled(&mds_user_clear)) + if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) goto out; /* - * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data - * mitigation, if necessary. + * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO + * Stale Data mitigation, if necessary. */ if (mds_mitigation == MDS_MITIGATION_OFF && boot_cpu_has_bug(X86_BUG_MDS)) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0b97bcde70c6..fbc4e60d027c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1589,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_vendor(c); get_cpu_cap(c); setup_force_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); cpu_parse_early_param(); if (this_cpu->c_early_init) @@ -1601,10 +1602,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_bsp_init(c); } else { setup_clear_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); } - get_cpu_address_sizes(c); - setup_force_cpu_cap(X86_FEATURE_ALWAYS); cpu_set_bug_bits(c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a927a8fc9624..40dec9b56f87 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -184,6 +184,90 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) return false; } +#define MSR_IA32_TME_ACTIVATE 0x982 + +/* Helpers to access TME_ACTIVATE MSR */ +#define TME_ACTIVATE_LOCKED(x) (x & 0x1) +#define TME_ACTIVATE_ENABLED(x) (x & 0x2) + +#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ +#define TME_ACTIVATE_POLICY_AES_XTS_128 0 + +#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ + +#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ +#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 + +/* Values for mktme_status (SW only construct) */ +#define MKTME_ENABLED 0 +#define MKTME_DISABLED 1 +#define MKTME_UNINITIALIZED 2 +static int mktme_status = MKTME_UNINITIALIZED; + +static void detect_tme_early(struct cpuinfo_x86 *c) +{ + u64 tme_activate, tme_policy, tme_crypto_algs; + int keyid_bits = 0, nr_keyids = 0; + static u64 tme_activate_cpu0 = 0; + + rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); + + if (mktme_status != MKTME_UNINITIALIZED) { + if (tme_activate != tme_activate_cpu0) { + /* Broken BIOS? */ + pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); + pr_err_once("x86/tme: MKTME is not usable\n"); + mktme_status = MKTME_DISABLED; + + /* Proceed. We may need to exclude bits from x86_phys_bits. */ + } + } else { + tme_activate_cpu0 = tme_activate; + } + + if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { + pr_info_once("x86/tme: not enabled by BIOS\n"); + mktme_status = MKTME_DISABLED; + return; + } + + if (mktme_status != MKTME_UNINITIALIZED) + goto detect_keyid_bits; + + pr_info("x86/tme: enabled by BIOS\n"); + + tme_policy = TME_ACTIVATE_POLICY(tme_activate); + if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) + pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); + + tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); + if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { + pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", + tme_crypto_algs); + mktme_status = MKTME_DISABLED; + } +detect_keyid_bits: + keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); + nr_keyids = (1UL << keyid_bits) - 1; + if (nr_keyids) { + pr_info_once("x86/mktme: enabled by BIOS\n"); + pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); + } else { + pr_info_once("x86/mktme: disabled by BIOS\n"); + } + + if (mktme_status == MKTME_UNINITIALIZED) { + /* MKTME is usable */ + mktme_status = MKTME_ENABLED; + } + + /* + * KeyID bits effectively lower the number of physical address + * bits. Update cpuinfo_x86::x86_phys_bits accordingly. + */ + c->x86_phys_bits -= keyid_bits; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -322,6 +406,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) */ if (detect_extended_topology_early(c) < 0) detect_ht_early(c); + + /* + * Adjust the number of physical bits early because it affects the + * valid bits of the MTRR mask registers. + */ + if (cpu_has(c, X86_FEATURE_TME)) + detect_tme_early(c); } static void bsp_init_intel(struct cpuinfo_x86 *c) @@ -482,90 +573,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -#define MSR_IA32_TME_ACTIVATE 0x982 - -/* Helpers to access TME_ACTIVATE MSR */ -#define TME_ACTIVATE_LOCKED(x) (x & 0x1) -#define TME_ACTIVATE_ENABLED(x) (x & 0x2) - -#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ -#define TME_ACTIVATE_POLICY_AES_XTS_128 0 - -#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ - -#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ -#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 - -/* Values for mktme_status (SW only construct) */ -#define MKTME_ENABLED 0 -#define MKTME_DISABLED 1 -#define MKTME_UNINITIALIZED 2 -static int mktme_status = MKTME_UNINITIALIZED; - -static void detect_tme(struct cpuinfo_x86 *c) -{ - u64 tme_activate, tme_policy, tme_crypto_algs; - int keyid_bits = 0, nr_keyids = 0; - static u64 tme_activate_cpu0 = 0; - - rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); - - if (mktme_status != MKTME_UNINITIALIZED) { - if (tme_activate != tme_activate_cpu0) { - /* Broken BIOS? */ - pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); - pr_err_once("x86/tme: MKTME is not usable\n"); - mktme_status = MKTME_DISABLED; - - /* Proceed. We may need to exclude bits from x86_phys_bits. */ - } - } else { - tme_activate_cpu0 = tme_activate; - } - - if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { - pr_info_once("x86/tme: not enabled by BIOS\n"); - mktme_status = MKTME_DISABLED; - return; - } - - if (mktme_status != MKTME_UNINITIALIZED) - goto detect_keyid_bits; - - pr_info("x86/tme: enabled by BIOS\n"); - - tme_policy = TME_ACTIVATE_POLICY(tme_activate); - if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) - pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); - - tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); - if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { - pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", - tme_crypto_algs); - mktme_status = MKTME_DISABLED; - } -detect_keyid_bits: - keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); - nr_keyids = (1UL << keyid_bits) - 1; - if (nr_keyids) { - pr_info_once("x86/mktme: enabled by BIOS\n"); - pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); - } else { - pr_info_once("x86/mktme: disabled by BIOS\n"); - } - - if (mktme_status == MKTME_UNINITIALIZED) { - /* MKTME is usable */ - mktme_status = MKTME_ENABLED; - } - - /* - * KeyID bits effectively lower the number of physical address - * bits. Update cpuinfo_x86::x86_phys_bits accordingly. - */ - c->x86_phys_bits -= keyid_bits; -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -702,9 +709,6 @@ static void init_intel(struct cpuinfo_x86 *c) init_ia32_feat_ctl(c); - if (cpu_has(c, X86_FEATURE_TME)) - detect_tme(c); - init_intel_misc_features(c); split_lock_init(); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index fb8cf953380d..b66f540de054 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void) e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); /* - * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need - * to be reserved. + * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by + * kexec and do not need to be reserved. */ - if (data->type != SETUP_EFI && data->type != SETUP_IMA) + if (data->type != SETUP_EFI && + data->type != SETUP_IMA && + data->type != SETUP_RNG_SEED) e820__range_update_kexec(pa_data, sizeof(*data) + data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 17e955ab69fe..3082cf24b69e 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -563,9 +563,6 @@ nmi_restart: } if (this_cpu_dec_return(nmi_state)) goto nmi_restart; - - if (user_mode(regs)) - mds_user_clear_cpu_buffers(); } #if IS_ENABLED(CONFIG_KVM_INTEL) diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h index edc3f16cc189..6a9bfdfbb6e5 100644 --- a/arch/x86/kvm/vmx/run_flags.h +++ b/arch/x86/kvm/vmx/run_flags.h @@ -2,7 +2,10 @@ #ifndef __KVM_X86_VMX_RUN_FLAGS_H #define __KVM_X86_VMX_RUN_FLAGS_H -#define VMX_RUN_VMRESUME (1 << 0) -#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) +#define VMX_RUN_VMRESUME_SHIFT 0 +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 + +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 906ecd001511..2bfbf758d061 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -139,7 +139,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - test $VMX_RUN_VMRESUME, %ebx + bt $VMX_RUN_VMRESUME_SHIFT, %ebx /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -161,8 +161,11 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Check EFLAGS.ZF from 'test VMX_RUN_VMRESUME' above */ - jz .Lvmlaunch + /* Clobbers EFLAGS.ZF */ + CLEAR_CPU_BUFFERS + + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ + jnc .Lvmlaunch /* * After a successful VMRESUME/VMLAUNCH, control flow "magically" diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1111d9d08903..88a4ff200d04 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -388,7 +388,16 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) && + /* + * Disable VERW's behavior of clearing CPU buffers for the guest if the + * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled + * the mitigation. Disabling the clearing behavior provides a + * performance boost for guests that aren't aware that manually clearing + * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry + * and VM-Exit. + */ + vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && + (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) && !boot_cpu_has_bug(X86_BUG_MDS) && !boot_cpu_has_bug(X86_BUG_TAA); @@ -7224,11 +7233,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, guest_state_enter_irqoff(); - /* L1D Flush includes CPU buffer clear to mitigate MDS */ + /* + * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW + * mitigation for MDS is done late in VMentry and is still + * executed in spite of L1D Flush. This is because an extra VERW + * should not matter much after the big hammer L1D Flush. + */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) - mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) mds_clear_cpu_buffers(); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index adc497b93f03..65e9a6e391c0 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -934,7 +934,7 @@ static int __init cmp_memblk(const void *a, const void *b) const struct numa_memblk *ma = *(const struct numa_memblk **)a; const struct numa_memblk *mb = *(const struct numa_memblk **)b; - return ma->start - mb->start; + return (ma->start > mb->start) - (ma->start < mb->start); } static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; @@ -944,14 +944,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; * @start: address to begin fill * @end: address to end fill * - * Find and extend numa_meminfo memblks to cover the @start-@end - * physical address range, such that the first memblk includes - * @start, the last memblk includes @end, and any gaps in between - * are filled. + * Find and extend numa_meminfo memblks to cover the physical + * address range @start-@end * * RETURNS: * 0 : Success - * NUMA_NO_MEMBLK : No memblk exists in @start-@end range + * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end */ int __init numa_fill_memblks(u64 start, u64 end) @@ -963,17 +961,14 @@ int __init numa_fill_memblks(u64 start, u64 end) /* * Create a list of pointers to numa_meminfo memblks that - * overlap start, end. Exclude (start == bi->end) since - * end addresses in both a CFMWS range and a memblk range - * are exclusive. - * - * This list of pointers is used to make in-place changes - * that fill out the numa_meminfo memblks. + * overlap start, end. The list is used to make in-place + * changes that fill out the numa_meminfo memblks. */ for (int i = 0; i < mi->nr_blks; i++) { struct numa_memblk *bi = &mi->blk[i]; - if (start < bi->end && end >= bi->start) { + if (memblock_addrs_overlap(start, end - start, bi->start, + bi->end - bi->start)) { blk[count] = &mi->blk[i]; count++; } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index e9b448d1b1b7..102880404046 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -755,10 +755,14 @@ pmd_t *lookup_pmd_address(unsigned long address) * areas on 32-bit NUMA systems. The percpu areas can * end up in this kind of memory, for instance. * - * This could be optimized, but it is only intended to be - * used at initialization time, and keeping it - * unoptimized should increase the testing coverage for - * the more obscure platforms. + * Note that as long as the PTEs are well-formed with correct PFNs, this + * works without checking the PRESENT bit in the leaf PTE. This is unlike + * the similar vmalloc_to_page() and derivatives. Callers may depend on + * this behavior. + * + * This could be optimized, but it is only used in paths that are not perf + * sensitive, and keeping it unoptimized should increase the testing coverage + * for the more obscure platforms. */ phys_addr_t slow_virt_to_phys(void *__virt_addr) { @@ -2041,17 +2045,12 @@ int set_mce_nospec(unsigned long pfn) return rc; } -static int set_memory_p(unsigned long *addr, int numpages) -{ - return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0); -} - /* Restore full speculative operation to the pfn. */ int clear_mce_nospec(unsigned long pfn) { unsigned long addr = (unsigned long) pfn_to_kaddr(pfn); - return set_memory_p(&addr, 1); + return set_memory_p(addr, 1); } EXPORT_SYMBOL_GPL(clear_mce_nospec); #endif /* CONFIG_X86_64 */ @@ -2104,6 +2103,11 @@ int set_memory_np_noalias(unsigned long addr, int numpages) CPA_NO_CHECK_ALIAS, NULL); } +int set_memory_p(unsigned long addr, int numpages) +{ + return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); +} + int set_memory_4k(unsigned long addr, int numpages) { return change_page_attr_set_clr(&addr, numpages, __pgprot(0), diff --git a/block/opal_proto.h b/block/opal_proto.h index dec7ce3a3edb..d247a457bf6e 100644 --- a/block/opal_proto.h +++ b/block/opal_proto.h @@ -71,6 +71,7 @@ enum opal_response_token { #define SHORT_ATOM_BYTE 0xBF #define MEDIUM_ATOM_BYTE 0xDF #define LONG_ATOM_BYTE 0xE3 +#define EMPTY_ATOM_BYTE 0xFF #define OPAL_INVAL_PARAM 12 #define OPAL_MANUFACTURED_INACTIVE 0x08 diff --git a/block/sed-opal.c b/block/sed-opal.c index 3d9e9cd250bd..fa4dba5d8531 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -1056,16 +1056,20 @@ static int response_parse(const u8 *buf, size_t length, token_length = response_parse_medium(iter, pos); else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */ token_length = response_parse_long(iter, pos); + else if (pos[0] == EMPTY_ATOM_BYTE) /* empty atom */ + token_length = 1; else /* TOKEN */ token_length = response_parse_token(iter, pos); if (token_length < 0) return token_length; + if (pos[0] != EMPTY_ATOM_BYTE) + num_entries++; + pos += token_length; total -= token_length; iter++; - num_entries++; } resp->num = num_entries; diff --git a/crypto/lskcipher.c b/crypto/lskcipher.c index 0b6dd8aa21f2..0f1bd7dcde24 100644 --- a/crypto/lskcipher.c +++ b/crypto/lskcipher.c @@ -212,13 +212,12 @@ static int crypto_lskcipher_crypt_sg(struct skcipher_request *req, ivsize = crypto_lskcipher_ivsize(tfm); ivs = PTR_ALIGN(ivs, crypto_skcipher_alignmask(skcipher) + 1); + memcpy(ivs, req->iv, ivsize); flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP; if (req->base.flags & CRYPTO_SKCIPHER_REQ_CONT) flags |= CRYPTO_LSKCIPHER_FLAG_CONT; - else - memcpy(ivs, req->iv, ivsize); if (!(req->base.flags & CRYPTO_SKCIPHER_REQ_NOTFINAL)) flags |= CRYPTO_LSKCIPHER_FLAG_FINAL; @@ -234,8 +233,7 @@ static int crypto_lskcipher_crypt_sg(struct skcipher_request *req, flags |= CRYPTO_LSKCIPHER_FLAG_CONT; } - if (flags & CRYPTO_LSKCIPHER_FLAG_FINAL) - memcpy(req->iv, ivs, ivsize); + memcpy(req->iv, ivs, ivsize); return err; } diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 1c995307c113..a1523d0b1ef3 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -24,7 +24,7 @@ #define SKU_HW_ID_SHIFT 16u #define SKU_HW_ID_MASK 0xffff0000u -#define PLL_CONFIG_DEFAULT 0x1 +#define PLL_CONFIG_DEFAULT 0x0 #define PLL_CDYN_DEFAULT 0x80 #define PLL_EPP_DEFAULT 0x80 #define PLL_REF_CLK_FREQ (50 * 1000000) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index fe825a432c5b..ab2a82cb1b0b 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -26,7 +26,6 @@ #include <linux/interrupt.h> #include <linux/timer.h> #include <linux/cper.h> -#include <linux/cxl-event.h> #include <linux/platform_device.h> #include <linux/mutex.h> #include <linux/ratelimit.h> @@ -674,52 +673,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } -/* - * Only a single callback can be registered for CXL CPER events. - */ -static DECLARE_RWSEM(cxl_cper_rw_sem); -static cxl_cper_callback cper_callback; - -static void cxl_cper_post_event(enum cxl_event_type event_type, - struct cxl_cper_event_rec *rec) -{ - if (rec->hdr.length <= sizeof(rec->hdr) || - rec->hdr.length > sizeof(*rec)) { - pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", - rec->hdr.length); - return; - } - - if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { - pr_err(FW_WARN "CXL CPER invalid event\n"); - return; - } - - guard(rwsem_read)(&cxl_cper_rw_sem); - if (cper_callback) - cper_callback(event_type, rec); -} - -int cxl_cper_register_callback(cxl_cper_callback callback) -{ - guard(rwsem_write)(&cxl_cper_rw_sem); - if (cper_callback) - return -EINVAL; - cper_callback = callback; - return 0; -} -EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL); - -int cxl_cper_unregister_callback(cxl_cper_callback callback) -{ - guard(rwsem_write)(&cxl_cper_rw_sem); - if (callback != cper_callback) - return -EINVAL; - cper_callback = NULL; - return 0; -} -EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL); - static bool ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -754,22 +707,6 @@ static bool ghes_do_proc(struct ghes *ghes, } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev, sync); - } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { - struct cxl_cper_event_rec *rec = - acpi_hest_get_payload(gdata); - - cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); - } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { - struct cxl_cper_event_rec *rec = - acpi_hest_get_payload(gdata); - - cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); - } else if (guid_equal(sec_type, - &CPER_SEC_CXL_MEM_MODULE_GUID)) { - struct cxl_cper_event_rec *rec = - acpi_hest_get_payload(gdata); - - cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); } else { void *err = acpi_hest_get_payload(gdata); diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index dbdee2924594..02255795b800 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -525,10 +525,12 @@ static void acpi_ec_clear(struct acpi_ec *ec) static void acpi_ec_enable_event(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); if (acpi_ec_started(ec)) __acpi_ec_enable_event(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); /* Drain additional events if hardware requires that */ if (EC_FLAGS_CLEAR_ON_RESUME) @@ -544,9 +546,11 @@ static void __acpi_ec_flush_work(void) static void acpi_ec_disable_event(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); __acpi_ec_disable_event(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); /* * When ec_freeze_events is true, we need to flush events in @@ -567,9 +571,10 @@ void acpi_ec_flush_work(void) static bool acpi_ec_guard_event(struct acpi_ec *ec) { + unsigned long flags; bool guarded; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); /* * If firmware SCI_EVT clearing timing is "event", we actually * don't know when the SCI_EVT will be cleared by firmware after @@ -585,29 +590,31 @@ static bool acpi_ec_guard_event(struct acpi_ec *ec) guarded = ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT && ec->event_state != EC_EVENT_READY && (!ec->curr || ec->curr->command != ACPI_EC_COMMAND_QUERY); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return guarded; } static int ec_transaction_polled(struct acpi_ec *ec) { + unsigned long flags; int ret = 0; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_POLL)) ret = 1; - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return ret; } static int ec_transaction_completed(struct acpi_ec *ec) { + unsigned long flags; int ret = 0; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_COMPLETE)) ret = 1; - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return ret; } @@ -749,6 +756,7 @@ static int ec_guard(struct acpi_ec *ec) static int ec_poll(struct acpi_ec *ec) { + unsigned long flags; int repeat = 5; /* number of command restarts */ while (repeat--) { @@ -757,14 +765,14 @@ static int ec_poll(struct acpi_ec *ec) do { if (!ec_guard(ec)) return 0; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); advance_transaction(ec, false); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } while (time_before(jiffies, delay)); pr_debug("controller reset, restart transaction\n"); - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); start_transaction(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } return -ETIME; } @@ -772,10 +780,11 @@ static int ec_poll(struct acpi_ec *ec) static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, struct transaction *t) { + unsigned long tmp; int ret = 0; /* start transaction */ - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, tmp); /* Enable GPE for command processing (IBF=0/OBF=1) */ if (!acpi_ec_submit_flushable_request(ec)) { ret = -EINVAL; @@ -786,11 +795,11 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, ec->curr = t; ec_dbg_req("Command(%s) started", acpi_ec_cmd_string(t->command)); start_transaction(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, tmp); ret = ec_poll(ec); - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, tmp); if (t->irq_count == ec_storm_threshold) acpi_ec_unmask_events(ec); ec_dbg_req("Command(%s) stopped", acpi_ec_cmd_string(t->command)); @@ -799,7 +808,7 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, acpi_ec_complete_request(ec); ec_dbg_ref(ec, "Decrease command"); unlock: - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, tmp); return ret; } @@ -927,7 +936,9 @@ EXPORT_SYMBOL(ec_get_handle); static void acpi_ec_start(struct acpi_ec *ec, bool resuming) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); if (!test_and_set_bit(EC_FLAGS_STARTED, &ec->flags)) { ec_dbg_drv("Starting EC"); /* Enable GPE for event processing (SCI_EVT=1) */ @@ -937,28 +948,31 @@ static void acpi_ec_start(struct acpi_ec *ec, bool resuming) } ec_log_drv("EC started"); } - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static bool acpi_ec_stopped(struct acpi_ec *ec) { + unsigned long flags; bool flushed; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); flushed = acpi_ec_flushed(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return flushed; } static void acpi_ec_stop(struct acpi_ec *ec, bool suspending) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); if (acpi_ec_started(ec)) { ec_dbg_drv("Stopping EC"); set_bit(EC_FLAGS_STOPPED, &ec->flags); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); wait_event(ec->wait, acpi_ec_stopped(ec)); - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); /* Disable GPE for event processing (SCI_EVT=1) */ if (!suspending) { acpi_ec_complete_request(ec); @@ -969,25 +983,29 @@ static void acpi_ec_stop(struct acpi_ec *ec, bool suspending) clear_bit(EC_FLAGS_STOPPED, &ec->flags); ec_log_drv("EC stopped"); } - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static void acpi_ec_enter_noirq(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); ec->busy_polling = true; ec->polling_guard = 0; ec_log_drv("interrupt blocked"); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static void acpi_ec_leave_noirq(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); ec->busy_polling = ec_busy_polling; ec->polling_guard = ec_polling_guard; ec_log_drv("interrupt unblocked"); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } void acpi_ec_block_transactions(void) @@ -1119,9 +1137,9 @@ static void acpi_ec_event_processor(struct work_struct *work) ec_dbg_evt("Query(0x%02x) stopped", handler->query_bit); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); ec->queries_in_progress--; - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); acpi_ec_put_query_handler(handler); kfree(q); @@ -1184,12 +1202,12 @@ static int acpi_ec_submit_query(struct acpi_ec *ec) */ ec_dbg_evt("Query(0x%02x) scheduled", value); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); ec->queries_in_progress++; queue_work(ec_query_wq, &q->work); - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); return 0; @@ -1205,14 +1223,14 @@ static void acpi_ec_event_handler(struct work_struct *work) ec_dbg_evt("Event started"); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); while (ec->events_to_process) { - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); acpi_ec_submit_query(ec); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); ec->events_to_process--; } @@ -1229,11 +1247,11 @@ static void acpi_ec_event_handler(struct work_struct *work) ec_dbg_evt("Event stopped"); - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); guard_timeout = !!ec_guard(ec); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); /* Take care of SCI_EVT unless someone else is doing that. */ if (guard_timeout && !ec->curr) @@ -1246,7 +1264,7 @@ static void acpi_ec_event_handler(struct work_struct *work) ec->events_in_progress--; - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); } static void clear_gpe_and_advance_transaction(struct acpi_ec *ec, bool interrupt) @@ -1271,11 +1289,13 @@ static void clear_gpe_and_advance_transaction(struct acpi_ec *ec, bool interrupt static void acpi_ec_handle_interrupt(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); clear_gpe_and_advance_transaction(ec, true); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static u32 acpi_ec_gpe_handler(acpi_handle gpe_device, @@ -2085,7 +2105,7 @@ bool acpi_ec_dispatch_gpe(void) * Dispatch the EC GPE in-band, but do not report wakeup in any case * to allow the caller to process events properly after that. */ - spin_lock(&first_ec->lock); + spin_lock_irq(&first_ec->lock); if (acpi_ec_gpe_status_set(first_ec)) { pm_pr_dbg("ACPI EC GPE status set\n"); @@ -2094,7 +2114,7 @@ bool acpi_ec_dispatch_gpe(void) work_in_progress = acpi_ec_work_in_progress(first_ec); } - spin_unlock(&first_ec->lock); + spin_unlock_irq(&first_ec->lock); if (!work_in_progress) return false; @@ -2107,11 +2127,11 @@ bool acpi_ec_dispatch_gpe(void) pm_pr_dbg("ACPI EC work flushed\n"); - spin_lock(&first_ec->lock); + spin_lock_irq(&first_ec->lock); work_in_progress = acpi_ec_work_in_progress(first_ec); - spin_unlock(&first_ec->lock); + spin_unlock_irq(&first_ec->lock); } while (work_in_progress && !pm_wakeup_pending()); return false; diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index da2e74fce2d9..682ff550ccfb 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -671,9 +671,17 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { - if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { - dev_info(&pdev->dev, "ASM1166 has only six ports\n"); - hpriv->saved_port_map = 0x3f; + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA) { + switch (pdev->device) { + case 0x1166: + dev_info(&pdev->dev, "ASM1166 has only six ports\n"); + hpriv->saved_port_map = 0x3f; + break; + case 0x1064: + dev_info(&pdev->dev, "ASM1064 has only four ports\n"); + hpriv->saved_port_map = 0xf; + break; + } } if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index 64f7f7d6ba84..11a2c199a7c2 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -88,7 +88,6 @@ struct ceva_ahci_priv { u32 axicc; bool is_cci_enabled; int flags; - struct reset_control *rst; }; static unsigned int ceva_ahci_read_id(struct ata_device *dev, @@ -189,6 +188,60 @@ static const struct scsi_host_template ahci_platform_sht = { AHCI_SHT(DRV_NAME), }; +static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) +{ + int rc, i; + + rc = ahci_platform_enable_regulators(hpriv); + if (rc) + return rc; + + rc = ahci_platform_enable_clks(hpriv); + if (rc) + goto disable_regulator; + + /* Assert the controller reset */ + rc = ahci_platform_assert_rsts(hpriv); + if (rc) + goto disable_clks; + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_init(hpriv->phys[i]); + if (rc) + goto disable_rsts; + } + + /* De-assert the controller reset */ + ahci_platform_deassert_rsts(hpriv); + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_power_on(hpriv->phys[i]); + if (rc) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } + } + + return 0; + +disable_rsts: + ahci_platform_deassert_rsts(hpriv); + +disable_phys: + while (--i >= 0) { + phy_power_off(hpriv->phys[i]); + phy_exit(hpriv->phys[i]); + } + +disable_clks: + ahci_platform_disable_clks(hpriv); + +disable_regulator: + ahci_platform_disable_regulators(hpriv); + + return rc; +} + static int ceva_ahci_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -203,47 +256,19 @@ static int ceva_ahci_probe(struct platform_device *pdev) return -ENOMEM; cevapriv->ahci_pdev = pdev; - - cevapriv->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, - NULL); - if (IS_ERR(cevapriv->rst)) - dev_err_probe(&pdev->dev, PTR_ERR(cevapriv->rst), - "failed to get reset\n"); - hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); - if (!cevapriv->rst) { - rc = ahci_platform_enable_resources(hpriv); - if (rc) - return rc; - } else { - int i; + hpriv->rsts = devm_reset_control_get_optional_exclusive(&pdev->dev, + NULL); + if (IS_ERR(hpriv->rsts)) + return dev_err_probe(&pdev->dev, PTR_ERR(hpriv->rsts), + "failed to get reset\n"); - rc = ahci_platform_enable_clks(hpriv); - if (rc) - return rc; - /* Assert the controller reset */ - reset_control_assert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_init(hpriv->phys[i]); - if (rc) - return rc; - } - - /* De-assert the controller reset */ - reset_control_deassert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_power_on(hpriv->phys[i]); - if (rc) { - phy_exit(hpriv->phys[i]); - return rc; - } - } - } + rc = ceva_ahci_platform_enable_resources(hpriv); + if (rc) + return rc; if (of_property_read_bool(np, "ceva,broken-gen2")) cevapriv->flags = CEVA_FLAG_BROKEN_GEN2; @@ -252,52 +277,60 @@ static int ceva_ahci_probe(struct platform_device *pdev) if (of_property_read_u8_array(np, "ceva,p0-cominit-params", (u8 *)&cevapriv->pp2c[0], 4) < 0) { dev_warn(dev, "ceva,p0-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-cominit-params", (u8 *)&cevapriv->pp2c[1], 4) < 0) { dev_warn(dev, "ceva,p1-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read OOB timing value for COMWAKE from device-tree*/ if (of_property_read_u8_array(np, "ceva,p0-comwake-params", (u8 *)&cevapriv->pp3c[0], 4) < 0) { dev_warn(dev, "ceva,p0-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-comwake-params", (u8 *)&cevapriv->pp3c[1], 4) < 0) { dev_warn(dev, "ceva,p1-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy BURST timing value from device-tree */ if (of_property_read_u8_array(np, "ceva,p0-burst-params", (u8 *)&cevapriv->pp4c[0], 4) < 0) { dev_warn(dev, "ceva,p0-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-burst-params", (u8 *)&cevapriv->pp4c[1], 4) < 0) { dev_warn(dev, "ceva,p1-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy RETRY interval timing value from device-tree */ if (of_property_read_u16_array(np, "ceva,p0-retry-params", (u16 *)&cevapriv->pp5c[0], 2) < 0) { dev_warn(dev, "ceva,p0-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u16_array(np, "ceva,p1-retry-params", (u16 *)&cevapriv->pp5c[1], 2) < 0) { dev_warn(dev, "ceva,p1-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* @@ -335,7 +368,7 @@ static int __maybe_unused ceva_ahci_resume(struct device *dev) struct ahci_host_priv *hpriv = host->private_data; int rc; - rc = ahci_platform_enable_resources(hpriv); + rc = ceva_ahci_platform_enable_resources(hpriv); if (rc) return rc; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 09ed67772fae..be3412cdb22e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2001,6 +2001,33 @@ bool ata_dev_power_init_tf(struct ata_device *dev, struct ata_taskfile *tf, return true; } +static bool ata_dev_power_is_active(struct ata_device *dev) +{ + struct ata_taskfile tf; + unsigned int err_mask; + + ata_tf_init(dev, &tf); + tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf.protocol = ATA_PROT_NODATA; + tf.command = ATA_CMD_CHK_POWER; + + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (err_mask) { + ata_dev_err(dev, "Check power mode failed (err_mask=0x%x)\n", + err_mask); + /* + * Assume we are in standby mode so that we always force a + * spinup in ata_dev_power_set_active(). + */ + return false; + } + + ata_dev_dbg(dev, "Power mode: 0x%02x\n", tf.nsect); + + /* Active or idle */ + return tf.nsect == 0xff; +} + /** * ata_dev_power_set_standby - Set a device power mode to standby * @dev: target device @@ -2017,6 +2044,11 @@ void ata_dev_power_set_standby(struct ata_device *dev) struct ata_taskfile tf; unsigned int err_mask; + /* If the device is already sleeping or in standby, do nothing. */ + if ((dev->flags & ATA_DFLAG_SLEEPING) || + !ata_dev_power_is_active(dev)) + return; + /* * Some odd clown BIOSes issue spindown on power off (ACPI S4 or S5) * causing some drives to spin up and down again. For these, do nothing @@ -2042,33 +2074,6 @@ void ata_dev_power_set_standby(struct ata_device *dev) err_mask); } -static bool ata_dev_power_is_active(struct ata_device *dev) -{ - struct ata_taskfile tf; - unsigned int err_mask; - - ata_tf_init(dev, &tf); - tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; - tf.protocol = ATA_PROT_NODATA; - tf.command = ATA_CMD_CHK_POWER; - - err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); - if (err_mask) { - ata_dev_err(dev, "Check power mode failed (err_mask=0x%x)\n", - err_mask); - /* - * Assume we are in standby mode so that we always force a - * spinup in ata_dev_power_set_active(). - */ - return false; - } - - ata_dev_dbg(dev, "Power mode: 0x%02x\n", tf.nsect); - - /* Active or idle */ - return tf.nsect == 0xff; -} - /** * ata_dev_power_set_active - Set a device power mode to active * @dev: target device diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 50d8ce20ae5b..9fb1575f8d88 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -2550,14 +2550,12 @@ static int fore200e_sba_probe(struct platform_device *op) return 0; } -static int fore200e_sba_remove(struct platform_device *op) +static void fore200e_sba_remove(struct platform_device *op) { struct fore200e *fore200e = dev_get_drvdata(&op->dev); fore200e_shutdown(fore200e); kfree(fore200e); - - return 0; } static const struct of_device_id fore200e_sba_match[] = { @@ -2574,7 +2572,7 @@ static struct platform_driver fore200e_sba_driver = { .of_match_table = fore200e_sba_match, }, .probe = fore200e_sba_probe, - .remove = fore200e_sba_remove, + .remove_new = fore200e_sba_remove, }; #endif diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index fdb0fae88d1c..b40b32fa7f1c 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev) bt_dev_dbg(hdev, "QCA Patch config"); skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd), - cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + cmd, 0, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err); diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c index a61757835695..9a7243d5db71 100644 --- a/drivers/bluetooth/hci_bcm4377.c +++ b/drivers/bluetooth/hci_bcm4377.c @@ -1417,7 +1417,7 @@ static int bcm4377_check_bdaddr(struct bcm4377_data *bcm4377) bda = (struct hci_rp_read_bd_addr *)skb->data; if (!bcm4377_is_valid_bdaddr(bcm4377, &bda->bdaddr)) - set_bit(HCI_QUIRK_INVALID_BDADDR, &bcm4377->hdev->quirks); + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &bcm4377->hdev->quirks); kfree_skb(skb); return 0; @@ -2368,7 +2368,6 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id) hdev->set_bdaddr = bcm4377_hci_set_bdaddr; hdev->setup = bcm4377_hci_setup; - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); if (bcm4377->hw->broken_mws_transport_config) set_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &hdev->quirks); if (bcm4377->hw->broken_ext_scan) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 94b8c406f0c0..edd2a81b4d5e 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,6 +7,7 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -1806,13 +1807,12 @@ static int qca_power_on(struct hci_dev *hdev) static void hci_coredump_qca(struct hci_dev *hdev) { + int err; static const u8 param[] = { 0x26 }; - struct sk_buff *skb; - skb = __hci_cmd_sync(hdev, 0xfc0c, 1, param, HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) - bt_dev_err(hdev, "%s: trigger crash failed (%ld)", __func__, PTR_ERR(skb)); - kfree_skb(skb); + err = __hci_cmd_send(hdev, 0xfc0c, 1, param); + if (err < 0) + bt_dev_err(hdev, "%s: trigger crash failed (%d)", __func__, err); } static int qca_get_data_path_id(struct hci_dev *hdev, __u8 *data_path_id) @@ -1904,7 +1904,17 @@ retry: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + + /* Set BDA quirk bit for reading BDA value from fwnode property + * only if that property exist in DT. + */ + if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); + } else { + bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); + } + hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 6b5da73c8541..837bf9d51c6e 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -120,7 +120,7 @@ static int imx_weim_gpr_setup(struct platform_device *pdev) i++; } - if (i == 0 || i % 4) + if (i == 0) goto err; for (i = 0; i < ARRAY_SIZE(gprvals); i++) { diff --git a/drivers/cache/ax45mp_cache.c b/drivers/cache/ax45mp_cache.c index 57186c58dc84..1d7dd3d2c101 100644 --- a/drivers/cache/ax45mp_cache.c +++ b/drivers/cache/ax45mp_cache.c @@ -129,8 +129,12 @@ static void ax45mp_dma_cache_wback(phys_addr_t paddr, size_t size) unsigned long line_size; unsigned long flags; + if (unlikely(start == end)) + return; + line_size = ax45mp_priv.ax45mp_cache_line_size; start = start & (~(line_size - 1)); + end = ((end + line_size - 1) & (~(line_size - 1))); local_irq_save(flags); ax45mp_cpu_dcache_wb_range(start, end); local_irq_restore(flags); diff --git a/drivers/clk/samsung/clk-gs101.c b/drivers/clk/samsung/clk-gs101.c index 0964bb11657f..782993951fff 100644 --- a/drivers/clk/samsung/clk-gs101.c +++ b/drivers/clk/samsung/clk-gs101.c @@ -2475,7 +2475,7 @@ static const struct samsung_cmu_info misc_cmu_info __initconst = { .nr_clk_ids = CLKS_NR_MISC, .clk_regs = misc_clk_regs, .nr_clk_regs = ARRAY_SIZE(misc_clk_regs), - .clk_name = "dout_cmu_misc_bus", + .clk_name = "bus", }; /* ---- platform_driver ----------------------------------------------------- */ diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ca94e60e705a..79619227ea51 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2987,6 +2987,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, if (min_pstate < cpu->min_perf_ratio) min_pstate = cpu->min_perf_ratio; + if (min_pstate > cpu->max_perf_ratio) + min_pstate = cpu->max_perf_ratio; + max_pstate = min(cap_pstate, cpu->max_perf_ratio); if (max_pstate < min_pstate) max_pstate = min_pstate; diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 1262a7773ef3..de50c00ba218 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -299,22 +299,6 @@ theend: return err; } -static void sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq) -{ - struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(breq); - struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); - struct sun8i_ce_dev *ce = op->ce; - struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(breq); - int flow, err; - - flow = rctx->flow; - err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm)); - local_bh_disable(); - crypto_finalize_skcipher_request(engine, breq, err); - local_bh_enable(); -} - static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine, void *async_req) { @@ -360,6 +344,23 @@ static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine, dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE); } +static void sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq) +{ + struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(breq); + struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sun8i_ce_dev *ce = op->ce; + struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(breq); + int flow, err; + + flow = rctx->flow; + err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm)); + sun8i_ce_cipher_unprepare(engine, areq); + local_bh_disable(); + crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); +} + int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq) { int err = sun8i_ce_cipher_prepare(engine, areq); @@ -368,7 +369,6 @@ int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq) return err; sun8i_ce_cipher_run(engine, areq); - sun8i_ce_cipher_unprepare(engine, areq); return 0; } diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 1b13b4aa16ec..a235e6c300f1 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -332,12 +332,12 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) theend: pm_runtime_put_autosuspend(rkc->dev); + rk_hash_unprepare(engine, breq); + local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); local_bh_enable(); - rk_hash_unprepare(engine, breq); - return 0; } diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index dcf2b39e1048..1a3e6aafbdcc 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -316,31 +316,27 @@ static const struct cxl_root_ops acpi_root_ops = { .qos_class = cxl_acpi_qos_class, }; -static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, - const unsigned long end) +static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, + struct cxl_cfmws_context *ctx) { int target_map[CXL_DECODER_MAX_INTERLEAVE]; - struct cxl_cfmws_context *ctx = arg; struct cxl_port *root_port = ctx->root_port; struct resource *cxl_res = ctx->cxl_res; struct cxl_cxims_context cxims_ctx; struct cxl_root_decoder *cxlrd; struct device *dev = ctx->dev; - struct acpi_cedt_cfmws *cfmws; cxl_calc_hb_fn cxl_calc_hb; struct cxl_decoder *cxld; unsigned int ways, i, ig; struct resource *res; int rc; - cfmws = (struct acpi_cedt_cfmws *) header; - rc = cxl_acpi_cfmws_verify(dev, cfmws); if (rc) { dev_err(dev, "CFMWS range %#llx-%#llx not registered\n", cfmws->base_hpa, cfmws->base_hpa + cfmws->window_size - 1); - return 0; + return rc; } rc = eiw_to_ways(cfmws->interleave_ways, &ways); @@ -376,7 +372,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); if (IS_ERR(cxlrd)) - return 0; + return PTR_ERR(cxlrd); cxld = &cxlrd->cxlsd.cxld; cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); @@ -420,16 +416,7 @@ err_xormap: put_device(&cxld->dev); else rc = cxl_decoder_autoremove(dev, cxld); - if (rc) { - dev_err(dev, "Failed to add decode range: %pr", res); - return rc; - } - dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n", - dev_name(&cxld->dev), - phys_to_target_node(cxld->hpa_range.start), - cxld->hpa_range.start, cxld->hpa_range.end); - - return 0; + return rc; err_insert: kfree(res->name); @@ -438,6 +425,29 @@ err_name: return -ENOMEM; } +static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header; + struct cxl_cfmws_context *ctx = arg; + struct device *dev = ctx->dev; + int rc; + + rc = __cxl_parse_cfmws(cfmws, ctx); + if (rc) + dev_err(dev, + "Failed to add decode range: [%#llx - %#llx] (%d)\n", + cfmws->base_hpa, + cfmws->base_hpa + cfmws->window_size - 1, rc); + else + dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n", + phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa, + cfmws->base_hpa + cfmws->window_size - 1); + + /* never fail cxl_acpi load for a single window failure */ + return 0; +} + __mock struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) { diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 6fe11546889f..08fd0baea7a0 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -210,19 +210,12 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, return 0; } -static void add_perf_entry(struct device *dev, struct dsmas_entry *dent, - struct list_head *list) +static void update_perf_entry(struct device *dev, struct dsmas_entry *dent, + struct cxl_dpa_perf *dpa_perf) { - struct cxl_dpa_perf *dpa_perf; - - dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL); - if (!dpa_perf) - return; - dpa_perf->dpa_range = dent->dpa_range; dpa_perf->coord = dent->coord; dpa_perf->qos_class = dent->qos_class; - list_add_tail(&dpa_perf->list, list); dev_dbg(dev, "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n", dent->dpa_range.start, dpa_perf->qos_class, @@ -230,20 +223,6 @@ static void add_perf_entry(struct device *dev, struct dsmas_entry *dent, dent->coord.read_latency, dent->coord.write_latency); } -static void free_perf_ents(void *data) -{ - struct cxl_memdev_state *mds = data; - struct cxl_dpa_perf *dpa_perf, *n; - LIST_HEAD(discard); - - list_splice_tail_init(&mds->ram_perf_list, &discard); - list_splice_tail_init(&mds->pmem_perf_list, &discard); - list_for_each_entry_safe(dpa_perf, n, &discard, list) { - list_del(&dpa_perf->list); - kfree(dpa_perf); - } -} - static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, struct xarray *dsmas_xa) { @@ -263,16 +242,14 @@ static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, xa_for_each(dsmas_xa, index, dent) { if (resource_size(&cxlds->ram_res) && range_contains(&ram_range, &dent->dpa_range)) - add_perf_entry(dev, dent, &mds->ram_perf_list); + update_perf_entry(dev, dent, &mds->ram_perf); else if (resource_size(&cxlds->pmem_res) && range_contains(&pmem_range, &dent->dpa_range)) - add_perf_entry(dev, dent, &mds->pmem_perf_list); + update_perf_entry(dev, dent, &mds->pmem_perf); else dev_dbg(dev, "no partition for dsmas dpa: %#llx\n", dent->dpa_range.start); } - - devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds); } static int match_cxlrd_qos_class(struct device *dev, void *data) @@ -293,24 +270,24 @@ static int match_cxlrd_qos_class(struct device *dev, void *data) return 0; } -static void cxl_qos_match(struct cxl_port *root_port, - struct list_head *work_list, - struct list_head *discard_list) +static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf) { - struct cxl_dpa_perf *dpa_perf, *n; + *dpa_perf = (struct cxl_dpa_perf) { + .qos_class = CXL_QOS_CLASS_INVALID, + }; +} - list_for_each_entry_safe(dpa_perf, n, work_list, list) { - int rc; +static bool cxl_qos_match(struct cxl_port *root_port, + struct cxl_dpa_perf *dpa_perf) +{ + if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID) + return false; - if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID) - return; + if (!device_for_each_child(&root_port->dev, &dpa_perf->qos_class, + match_cxlrd_qos_class)) + return false; - rc = device_for_each_child(&root_port->dev, - (void *)&dpa_perf->qos_class, - match_cxlrd_qos_class); - if (!rc) - list_move_tail(&dpa_perf->list, discard_list); - } + return true; } static int match_cxlrd_hb(struct device *dev, void *data) @@ -334,23 +311,10 @@ static int match_cxlrd_hb(struct device *dev, void *data) return 0; } -static void discard_dpa_perf(struct list_head *list) -{ - struct cxl_dpa_perf *dpa_perf, *n; - - list_for_each_entry_safe(dpa_perf, n, list, list) { - list_del(&dpa_perf->list); - kfree(dpa_perf); - } -} -DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T)) - static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - LIST_HEAD(__discard); - struct list_head *discard __free(dpa_perf) = &__discard; struct cxl_port *root_port; int rc; @@ -363,16 +327,17 @@ static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) root_port = &cxl_root->port; /* Check that the QTG IDs are all sane between end device and root decoders */ - cxl_qos_match(root_port, &mds->ram_perf_list, discard); - cxl_qos_match(root_port, &mds->pmem_perf_list, discard); + if (!cxl_qos_match(root_port, &mds->ram_perf)) + reset_dpa_perf(&mds->ram_perf); + if (!cxl_qos_match(root_port, &mds->pmem_perf)) + reset_dpa_perf(&mds->pmem_perf); /* Check to make sure that the device's host bridge is under a root decoder */ rc = device_for_each_child(&root_port->dev, - (void *)cxlmd->endpoint->host_bridge, - match_cxlrd_hb); + cxlmd->endpoint->host_bridge, match_cxlrd_hb); if (!rc) { - list_splice_tail_init(&mds->ram_perf_list, discard); - list_splice_tail_init(&mds->pmem_perf_list, discard); + reset_dpa_perf(&mds->ram_perf); + reset_dpa_perf(&mds->pmem_perf); } return rc; @@ -417,6 +382,7 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port) cxl_memdev_set_qos_class(cxlds, dsmas_xa); cxl_qos_class_verify(cxlmd); + cxl_memdev_update_perf(cxlmd); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 27166a411705..9adda4795eb7 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1391,8 +1391,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mds->cxlds.reg_map.host = dev; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; - INIT_LIST_HEAD(&mds->ram_perf_list); - INIT_LIST_HEAD(&mds->pmem_perf_list); + mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID; + mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID; return mds; } diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index dae8802ecdb0..d4e259f3a7e9 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -447,13 +447,41 @@ static struct attribute *cxl_memdev_attributes[] = { NULL, }; +static ssize_t pmem_qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + + return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class); +} + +static struct device_attribute dev_attr_pmem_qos_class = + __ATTR(qos_class, 0444, pmem_qos_class_show, NULL); + static struct attribute *cxl_memdev_pmem_attributes[] = { &dev_attr_pmem_size.attr, + &dev_attr_pmem_qos_class.attr, NULL, }; +static ssize_t ram_qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + + return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class); +} + +static struct device_attribute dev_attr_ram_qos_class = + __ATTR(qos_class, 0444, ram_qos_class_show, NULL); + static struct attribute *cxl_memdev_ram_attributes[] = { &dev_attr_ram_size.attr, + &dev_attr_ram_qos_class.attr, NULL, }; @@ -477,14 +505,42 @@ static struct attribute_group cxl_memdev_attribute_group = { .is_visible = cxl_memdev_visible, }; +static umode_t cxl_ram_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + + if (a == &dev_attr_ram_qos_class.attr) + if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID) + return 0; + + return a->mode; +} + static struct attribute_group cxl_memdev_ram_attribute_group = { .name = "ram", .attrs = cxl_memdev_ram_attributes, + .is_visible = cxl_ram_visible, }; +static umode_t cxl_pmem_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + + if (a == &dev_attr_pmem_qos_class.attr) + if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID) + return 0; + + return a->mode; +} + static struct attribute_group cxl_memdev_pmem_attribute_group = { .name = "pmem", .attrs = cxl_memdev_pmem_attributes, + .is_visible = cxl_pmem_visible, }; static umode_t cxl_memdev_security_visible(struct kobject *kobj, @@ -519,6 +575,13 @@ static const struct attribute_group *cxl_memdev_attribute_groups[] = { NULL, }; +void cxl_memdev_update_perf(struct cxl_memdev *cxlmd) +{ + sysfs_update_group(&cxlmd->dev.kobj, &cxl_memdev_ram_attribute_group); + sysfs_update_group(&cxlmd->dev.kobj, &cxl_memdev_pmem_attribute_group); +} +EXPORT_SYMBOL_NS_GPL(cxl_memdev_update_perf, CXL); + static const struct device_type cxl_memdev_type = { .name = "cxl_memdev", .release = cxl_memdev_release, diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 6c9c8d92f8f7..e9e6c81ce034 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -477,9 +477,9 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, allowed++; } - if (!allowed) { - cxl_set_mem_enable(cxlds, 0); - info->mem_enabled = 0; + if (!allowed && info->mem_enabled) { + dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n"); + return -ENXIO; } /* @@ -932,11 +932,21 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } void cxl_cor_error_detected(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct device *dev = &cxlds->cxlmd->dev; + + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return; + } - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); - cxl_handle_endpoint_cor_ras(cxlds); + cxl_handle_endpoint_cor_ras(cxlds); + } } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); @@ -948,16 +958,25 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, struct device *dev = &cxlmd->dev; bool ue; - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_handle_endpoint_ras(cxlds); + } - /* - * A frozen channel indicates an impending reset which is fatal to - * CXL.mem operation, and will likely crash the system. On the off - * chance the situation is recoverable dump the status of the RAS - * capability registers and bounce the active state of the memdev. - */ - ue = cxl_handle_endpoint_ras(cxlds); switch (state) { case pci_channel_io_normal: diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index ce0e2d82bb2b..4c7fd2d5cccb 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -730,12 +730,17 @@ static int match_auto_decoder(struct device *dev, void *data) return 0; } -static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port, - struct cxl_region *cxlr) +static struct cxl_decoder * +cxl_region_find_decoder(struct cxl_port *port, + struct cxl_endpoint_decoder *cxled, + struct cxl_region *cxlr) { struct device *dev; int id = 0; + if (port == cxled_to_port(cxled)) + return &cxled->cxld; + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) dev = device_find_child(&port->dev, &cxlr->params, match_auto_decoder); @@ -753,8 +758,31 @@ static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port, return to_cxl_decoder(dev); } -static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, - struct cxl_region *cxlr) +static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter, + struct cxl_decoder *cxld) +{ + struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter); + struct cxl_decoder *cxld_iter = rr->decoder; + + /* + * Allow the out of order assembly of auto-discovered regions. + * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders + * in HPA order. Confirm that the decoder with the lesser HPA + * starting address has the lesser id. + */ + dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n", + dev_name(&cxld->dev), cxld->id, + dev_name(&cxld_iter->dev), cxld_iter->id); + + if (cxld_iter->id > cxld->id) + return true; + + return false; +} + +static struct cxl_region_ref * +alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled) { struct cxl_region_params *p = &cxlr->params; struct cxl_region_ref *cxl_rr, *iter; @@ -764,16 +792,21 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, xa_for_each(&port->regions, index, iter) { struct cxl_region_params *ip = &iter->region->params; - if (!ip->res) + if (!ip->res || ip->res->start < p->res->start) continue; - if (ip->res->start > p->res->start) { - dev_dbg(&cxlr->dev, - "%s: HPA order violation %s:%pr vs %pr\n", - dev_name(&port->dev), - dev_name(&iter->region->dev), ip->res, p->res); - return ERR_PTR(-EBUSY); + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + struct cxl_decoder *cxld; + + cxld = cxl_region_find_decoder(port, cxled, cxlr); + if (auto_order_ok(port, iter->region, cxld)) + continue; } + dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n", + dev_name(&port->dev), + dev_name(&iter->region->dev), ip->res, p->res); + + return ERR_PTR(-EBUSY); } cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL); @@ -853,10 +886,7 @@ static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr, { struct cxl_decoder *cxld; - if (port == cxled_to_port(cxled)) - cxld = &cxled->cxld; - else - cxld = cxl_region_find_decoder(port, cxlr); + cxld = cxl_region_find_decoder(port, cxled, cxlr); if (!cxld) { dev_dbg(&cxlr->dev, "%s: no decoder available\n", dev_name(&port->dev)); @@ -953,7 +983,7 @@ static int cxl_port_attach_region(struct cxl_port *port, nr_targets_inc = true; } } else { - cxl_rr = alloc_region_ref(port, cxlr); + cxl_rr = alloc_region_ref(port, cxlr, cxled); if (IS_ERR(cxl_rr)) { dev_dbg(&cxlr->dev, "%s: failed to allocate region reference\n", diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index b6017c0c57b4..003feebab79b 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -880,6 +880,8 @@ void cxl_switch_parse_cdat(struct cxl_port *port); int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord); +void cxl_memdev_update_perf(struct cxl_memdev *cxlmd); + /* * Unit test builds overrides this to __weak, find the 'strong' version * of these symbols in tools/testing/cxl/. diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 5303d6942b88..20fb3b35e89e 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -395,13 +395,11 @@ enum cxl_devtype { /** * struct cxl_dpa_perf - DPA performance property entry - * @list - list entry * @dpa_range - range for DPA address * @coord - QoS performance data (i.e. latency, bandwidth) * @qos_class - QoS Class cookies */ struct cxl_dpa_perf { - struct list_head list; struct range dpa_range; struct access_coordinate coord; int qos_class; @@ -471,8 +469,8 @@ struct cxl_dev_state { * @security: security driver state info * @fw: firmware upload / activation state * @mbox_send: @dev specific transport for transmitting mailbox commands - * @ram_perf_list: performance data entries matched to RAM - * @pmem_perf_list: performance data entries matched to PMEM + * @ram_perf: performance data entry matched to RAM partition + * @pmem_perf: performance data entry matched to PMEM partition * * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -494,8 +492,8 @@ struct cxl_memdev_state { u64 next_volatile_bytes; u64 next_persistent_bytes; - struct list_head ram_perf_list; - struct list_head pmem_perf_list; + struct cxl_dpa_perf ram_perf; + struct cxl_dpa_perf pmem_perf; struct cxl_event_state event; struct cxl_poison_state poison; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index c5c9d8e0d88d..0c79d9ce877c 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -215,52 +215,6 @@ static ssize_t trigger_poison_list_store(struct device *dev, } static DEVICE_ATTR_WO(trigger_poison_list); -static ssize_t ram_qos_class_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - struct cxl_dpa_perf *dpa_perf; - - if (!dev->driver) - return -ENOENT; - - if (list_empty(&mds->ram_perf_list)) - return -ENOENT; - - dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf, - list); - - return sysfs_emit(buf, "%d\n", dpa_perf->qos_class); -} - -static struct device_attribute dev_attr_ram_qos_class = - __ATTR(qos_class, 0444, ram_qos_class_show, NULL); - -static ssize_t pmem_qos_class_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - struct cxl_dpa_perf *dpa_perf; - - if (!dev->driver) - return -ENOENT; - - if (list_empty(&mds->pmem_perf_list)) - return -ENOENT; - - dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf, - list); - - return sysfs_emit(buf, "%d\n", dpa_perf->qos_class); -} - -static struct device_attribute dev_attr_pmem_qos_class = - __ATTR(qos_class, 0444, pmem_qos_class_show, NULL); - static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); @@ -272,21 +226,11 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) mds->poison.enabled_cmds)) return 0; - if (a == &dev_attr_pmem_qos_class.attr) - if (list_empty(&mds->pmem_perf_list)) - return 0; - - if (a == &dev_attr_ram_qos_class.attr) - if (list_empty(&mds->ram_perf_list)) - return 0; - return a->mode; } static struct attribute *cxl_mem_attrs[] = { &dev_attr_trigger_poison_list.attr, - &dev_attr_ram_qos_class.attr, - &dev_attr_pmem_qos_class.attr, NULL }; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 233e7c42c161..2ff361e756d6 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -974,61 +974,6 @@ static struct pci_driver cxl_pci_driver = { }, }; -#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) -static void cxl_cper_event_call(enum cxl_event_type ev_type, - struct cxl_cper_event_rec *rec) -{ - struct cper_cxl_event_devid *device_id = &rec->hdr.device_id; - struct pci_dev *pdev __free(pci_dev_put) = NULL; - enum cxl_event_log_type log_type; - struct cxl_dev_state *cxlds; - unsigned int devfn; - u32 hdr_flags; - - devfn = PCI_DEVFN(device_id->device_num, device_id->func_num); - pdev = pci_get_domain_bus_and_slot(device_id->segment_num, - device_id->bus_num, devfn); - if (!pdev) - return; - - guard(pci_dev)(pdev); - if (pdev->driver != &cxl_pci_driver) - return; - - cxlds = pci_get_drvdata(pdev); - if (!cxlds) - return; - - /* Fabricate a log type */ - hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags); - log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags); - - cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, - &uuid_null, &rec->event); -} - -static int __init cxl_pci_driver_init(void) -{ - int rc; - - rc = cxl_cper_register_callback(cxl_cper_event_call); - if (rc) - return rc; - - rc = pci_register_driver(&cxl_pci_driver); - if (rc) - cxl_cper_unregister_callback(cxl_cper_event_call); - - return rc; -} - -static void __exit cxl_pci_driver_exit(void) -{ - pci_unregister_driver(&cxl_pci_driver); - cxl_cper_unregister_callback(cxl_cper_event_call); -} - -module_init(cxl_pci_driver_init); -module_exit(cxl_pci_driver_exit); +module_pci_driver(cxl_pci_driver); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c index b38786f0ad79..b75fdaffad9a 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-core.c +++ b/drivers/dma/dw-edma/dw-edma-v0-core.c @@ -346,6 +346,20 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk) dw_edma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr); } +static void dw_edma_v0_sync_ll_data(struct dw_edma_chunk *chunk) +{ + /* + * In case of remote eDMA engine setup, the DW PCIe RP/EP internal + * configuration registers and application memory are normally accessed + * over different buses. Ensure LL-data reaches the memory before the + * doorbell register is toggled by issuing the dummy-read from the remote + * LL memory in a hope that the MRd TLP will return only after the + * last MWr TLP is completed + */ + if (!(chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + readl(chunk->ll_region.vaddr.io); +} + static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) { struct dw_edma_chan *chan = chunk->chan; @@ -412,6 +426,9 @@ static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) SET_CH_32(dw, chan->dir, chan->id, llp.msb, upper_32_bits(chunk->ll_region.paddr)); } + + dw_edma_v0_sync_ll_data(chunk); + /* Doorbell */ SET_RW_32(dw, chan->dir, doorbell, FIELD_PREP(EDMA_V0_DOORBELL_CH_MASK, chan->id)); diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 00b735a0202a..10e8f0715114 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -65,18 +65,12 @@ static void dw_hdma_v0_core_off(struct dw_edma *dw) static u16 dw_hdma_v0_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir) { - u32 num_ch = 0; - int id; - - for (id = 0; id < HDMA_V0_MAX_NR_CH; id++) { - if (GET_CH_32(dw, id, dir, ch_en) & BIT(0)) - num_ch++; - } - - if (num_ch > HDMA_V0_MAX_NR_CH) - num_ch = HDMA_V0_MAX_NR_CH; - - return (u16)num_ch; + /* + * The HDMA IP have no way to know the number of hardware channels + * available, we set it to maximum channels and let the platform + * set the right number of channels. + */ + return HDMA_V0_MAX_NR_CH; } static enum dma_status dw_hdma_v0_core_ch_status(struct dw_edma_chan *chan) @@ -228,6 +222,20 @@ static void dw_hdma_v0_core_write_chunk(struct dw_edma_chunk *chunk) dw_hdma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr); } +static void dw_hdma_v0_sync_ll_data(struct dw_edma_chunk *chunk) +{ + /* + * In case of remote HDMA engine setup, the DW PCIe RP/EP internal + * configuration registers and application memory are normally accessed + * over different buses. Ensure LL-data reaches the memory before the + * doorbell register is toggled by issuing the dummy-read from the remote + * LL memory in a hope that the MRd TLP will return only after the + * last MWr TLP is completed + */ + if (!(chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + readl(chunk->ll_region.vaddr.io); +} + static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) { struct dw_edma_chan *chan = chunk->chan; @@ -242,7 +250,9 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) /* Interrupt enable&unmask - done, abort */ tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup) | HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK | - HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_STOP_INT_EN; + HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_ABORT_INT_EN; + if (!(dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + tmp |= HDMA_V0_REMOTE_STOP_INT_EN | HDMA_V0_REMOTE_ABORT_INT_EN; SET_CH_32(dw, chan->dir, chan->id, int_setup, tmp); /* Channel control */ SET_CH_32(dw, chan->dir, chan->id, control1, HDMA_V0_LINKLIST_EN); @@ -256,6 +266,9 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) /* Set consumer cycle */ SET_CH_32(dw, chan->dir, chan->id, cycle_sync, HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT); + + dw_hdma_v0_sync_ll_data(chunk); + /* Doorbell */ SET_CH_32(dw, chan->dir, chan->id, doorbell, HDMA_V0_DOORBELL_START); } diff --git a/drivers/dma/dw-edma/dw-hdma-v0-regs.h b/drivers/dma/dw-edma/dw-hdma-v0-regs.h index a974abdf8aaf..eab5fd7177e5 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-regs.h +++ b/drivers/dma/dw-edma/dw-hdma-v0-regs.h @@ -15,7 +15,7 @@ #define HDMA_V0_LOCAL_ABORT_INT_EN BIT(6) #define HDMA_V0_REMOTE_ABORT_INT_EN BIT(5) #define HDMA_V0_LOCAL_STOP_INT_EN BIT(4) -#define HDMA_V0_REMOTEL_STOP_INT_EN BIT(3) +#define HDMA_V0_REMOTE_STOP_INT_EN BIT(3) #define HDMA_V0_ABORT_INT_MASK BIT(2) #define HDMA_V0_STOP_INT_MASK BIT(0) #define HDMA_V0_LINKLIST_EN BIT(0) diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index b53f46245c37..793f1a7ad5e3 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -503,7 +503,7 @@ void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan, if (fsl_chan->is_multi_fifo) { /* set mloff to support multiple fifo */ burst = cfg->direction == DMA_DEV_TO_MEM ? - cfg->src_addr_width : cfg->dst_addr_width; + cfg->src_maxburst : cfg->dst_maxburst; nbytes |= EDMA_V3_TCD_NBYTES_MLOFF(-(burst * 4)); /* enable DMLOE/SMLOE */ if (cfg->direction == DMA_MEM_TO_DEV) { diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h index bb5221158a77..f5e216b157c7 100644 --- a/drivers/dma/fsl-edma-common.h +++ b/drivers/dma/fsl-edma-common.h @@ -30,8 +30,9 @@ #define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8) #define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11) -#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0)) -#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0)) +#define EDMA_TCD_ITER_MASK GENMASK(14, 0) +#define EDMA_TCD_CITER_CITER(x) ((x) & EDMA_TCD_ITER_MASK) +#define EDMA_TCD_BITER_BITER(x) ((x) & EDMA_TCD_ITER_MASK) #define EDMA_TCD_CSR_START BIT(0) #define EDMA_TCD_CSR_INT_MAJOR BIT(1) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 45cc419b1b4a..d36e28b9c767 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -10,6 +10,7 @@ */ #include <dt-bindings/dma/fsl-edma.h> +#include <linux/bitfield.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/clk.h> @@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev) DMAENGINE_ALIGN_32_BYTES; /* Per worst case 'nbytes = 1' take CITER as the max_seg_size */ - dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff); + dma_set_max_seg_size(fsl_edma->dma_dev.dev, + FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK)); fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index f405c77060ad..5005e138fc23 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -109,6 +109,7 @@ #define FSL_QDMA_CMD_WTHROTL_OFFSET 20 #define FSL_QDMA_CMD_DSEN_OFFSET 19 #define FSL_QDMA_CMD_LWC_OFFSET 16 +#define FSL_QDMA_CMD_PF BIT(17) /* Field definition for Descriptor status */ #define QDMA_CCDF_STATUS_RTE BIT(5) @@ -160,6 +161,10 @@ struct fsl_qdma_format { u8 __reserved1[2]; u8 cfg8b_w1; } __packed; + struct { + __le32 __reserved2; + __le32 cmd; + } __packed; __le64 data; }; } __packed; @@ -354,7 +359,6 @@ static void fsl_qdma_free_chan_resources(struct dma_chan *chan) static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, dma_addr_t dst, dma_addr_t src, u32 len) { - u32 cmd; struct fsl_qdma_format *sdf, *ddf; struct fsl_qdma_format *ccdf, *csgf_desc, *csgf_src, *csgf_dest; @@ -383,14 +387,11 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, /* This entry is the last entry. */ qdma_csgf_set_f(csgf_dest, len); /* Descriptor Buffer */ - cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); - sdf->data = QDMA_SDDF_CMD(cmd); - - cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); - cmd |= cpu_to_le32(FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET); - ddf->data = QDMA_SDDF_CMD(cmd); + sdf->cmd = cpu_to_le32((FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET) | + FSL_QDMA_CMD_PF); + + ddf->cmd = cpu_to_le32((FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET) | + (FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET)); } /* @@ -624,7 +625,7 @@ static int fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma) static int fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, - void *block, + __iomem void *block, int id) { bool duplicate; @@ -1196,10 +1197,6 @@ static int fsl_qdma_probe(struct platform_device *pdev) if (!fsl_qdma->queue) return -ENOMEM; - ret = fsl_qdma_irq_init(pdev, fsl_qdma); - if (ret) - return ret; - fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0"); if (fsl_qdma->irq_base < 0) return fsl_qdma->irq_base; @@ -1238,16 +1235,19 @@ static int fsl_qdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, fsl_qdma); - ret = dma_async_device_register(&fsl_qdma->dma_dev); + ret = fsl_qdma_reg_init(fsl_qdma); if (ret) { - dev_err(&pdev->dev, - "Can't register NXP Layerscape qDMA engine.\n"); + dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); return ret; } - ret = fsl_qdma_reg_init(fsl_qdma); + ret = fsl_qdma_irq_init(pdev, fsl_qdma); + if (ret) + return ret; + + ret = dma_async_device_register(&fsl_qdma->dma_dev); if (ret) { - dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); + dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n"); return ret; } diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 77f8885cf407..e5a94a93a3cc 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -345,7 +345,7 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) spin_lock(&evl->lock); status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = status.tail; - h = evl->head; + h = status.head; size = evl->size; while (h != t) { diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c index 9cfbd9b14c4c..f3f25ee676f3 100644 --- a/drivers/dma/idxd/debugfs.c +++ b/drivers/dma/idxd/debugfs.c @@ -68,9 +68,9 @@ static int debugfs_evl_show(struct seq_file *s, void *d) spin_lock(&evl->lock); - h = evl->head; evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = evl_status.tail; + h = evl_status.head; evl_size = evl->size; seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n", diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 47de3f93ff1e..d0f5db6cf1ed 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -300,7 +300,6 @@ struct idxd_evl { unsigned int log_size; /* The number of entries in the event log. */ u16 size; - u16 head; unsigned long *bmap; bool batch_fail[IDXD_MAX_BATCH_IDENT]; }; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 14df1f1347a8..4954adc6bb60 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -343,7 +343,9 @@ static void idxd_cleanup_internals(struct idxd_device *idxd) static int idxd_init_evl(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; + unsigned int evl_cache_size; struct idxd_evl *evl; + const char *idxd_name; if (idxd->hw.gen_cap.evl_support == 0) return 0; @@ -355,9 +357,16 @@ static int idxd_init_evl(struct idxd_device *idxd) spin_lock_init(&evl->lock); evl->size = IDXD_EVL_SIZE_MIN; - idxd->evl_cache = kmem_cache_create(dev_name(idxd_confdev(idxd)), - sizeof(struct idxd_evl_fault) + evl_ent_size(idxd), - 0, 0, NULL); + idxd_name = dev_name(idxd_confdev(idxd)); + evl_cache_size = sizeof(struct idxd_evl_fault) + evl_ent_size(idxd); + /* + * Since completion record in evl_cache will be copied to user + * when handling completion record page fault, need to create + * the cache suitable for user copy. + */ + idxd->evl_cache = kmem_cache_create_usercopy(idxd_name, evl_cache_size, + 0, 0, 0, evl_cache_size, + NULL); if (!idxd->evl_cache) { kfree(evl); return -ENOMEM; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index c8a0aa874b11..348aa21389a9 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -367,9 +367,9 @@ static void process_evl_entries(struct idxd_device *idxd) /* Clear interrupt pending bit */ iowrite32(evl_status.bits_upper32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); - h = evl->head; evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = evl_status.tail; + h = evl_status.head; size = idxd->evl->size; while (h != t) { @@ -378,7 +378,6 @@ static void process_evl_entries(struct idxd_device *idxd) h = (h + 1) % size; } - evl->head = h; evl_status.head = h; iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); spin_unlock(&evl->lock); diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c index 1aa65e5de0f3..f79240734807 100644 --- a/drivers/dma/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/ptdma/ptdma-dmaengine.c @@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt) chan->vc.desc_free = pt_do_cleanup; vchan_init(&chan->vc, dma_dev); - dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64)); - ret = dma_async_device_register(dma_dev); if (ret) goto err_reg; diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 93c1bb7a6ef7..6b2a6236f5e3 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -510,6 +510,26 @@ err_pin_prop: return ERR_PTR(ret); } +static void dpll_netdev_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + rtnl_lock(); + rcu_assign_pointer(dev->dpll_pin, dpll_pin); + rtnl_unlock(); +} + +void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + WARN_ON(!dpll_pin); + dpll_netdev_pin_assign(dev, dpll_pin); +} +EXPORT_SYMBOL(dpll_netdev_pin_set); + +void dpll_netdev_pin_clear(struct net_device *dev) +{ + dpll_netdev_pin_assign(dev, NULL); +} +EXPORT_SYMBOL(dpll_netdev_pin_clear); + /** * dpll_pin_get - find existing or create new dpll pin * @clock_id: clock_id of creator @@ -566,7 +586,7 @@ void dpll_pin_put(struct dpll_pin *pin) xa_destroy(&pin->dpll_refs); xa_destroy(&pin->parent_refs); dpll_pin_prop_free(&pin->prop); - kfree(pin); + kfree_rcu(pin, rcu); } mutex_unlock(&dpll_lock); } diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h index 717f715015c7..2b6d8ef1cdf3 100644 --- a/drivers/dpll/dpll_core.h +++ b/drivers/dpll/dpll_core.h @@ -47,6 +47,7 @@ struct dpll_device { * @prop: pin properties copied from the registerer * @rclk_dev_name: holds name of device when pin can recover clock from it * @refcount: refcount + * @rcu: rcu_head for kfree_rcu() **/ struct dpll_pin { u32 id; @@ -57,6 +58,7 @@ struct dpll_pin { struct xarray parent_refs; struct dpll_pin_properties prop; refcount_t refcount; + struct rcu_head rcu; }; /** diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 1419fd0d241c..98e6ad8528d3 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -8,6 +8,7 @@ */ #include <linux/module.h> #include <linux/kernel.h> +#include <linux/netdevice.h> #include <net/genetlink.h> #include "dpll_core.h" #include "dpll_netlink.h" @@ -48,18 +49,6 @@ dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id) } /** - * dpll_msg_pin_handle_size - get size of pin handle attribute for given pin - * @pin: pin pointer - * - * Return: byte size of pin handle attribute for given pin. - */ -size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) -{ - return pin ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ -} -EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); - -/** * dpll_msg_add_pin_handle - attach pin handle attribute to a given message * @msg: pointer to sk_buff message to attach a pin handle * @pin: pin pointer @@ -68,7 +57,7 @@ EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); * * 0 - success * * -EMSGSIZE - no space in message to attach pin handle */ -int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +static int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) { if (!pin) return 0; @@ -76,7 +65,28 @@ int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) return -EMSGSIZE; return 0; } -EXPORT_SYMBOL_GPL(dpll_msg_add_pin_handle); + +static struct dpll_pin *dpll_netdev_pin(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->dpll_pin); +} + +/** + * dpll_netdev_pin_handle_size - get size of pin handle attribute of a netdev + * @dev: netdev from which to get the pin + * + * Return: byte size of pin handle attribute, or 0 if @dev has no pin. + */ +size_t dpll_netdev_pin_handle_size(const struct net_device *dev) +{ + return dpll_netdev_pin(dev) ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ +} + +int dpll_netdev_add_pin_handle(struct sk_buff *msg, + const struct net_device *dev) +{ + return dpll_msg_add_pin_handle(msg, dpll_netdev_pin(dev)); +} static int dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll, diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 8aaa7fcb2630..401a77e3b5fa 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -500,7 +500,19 @@ static void bm_work(struct work_struct *work) fw_notice(card, "phy config: new root=%x, gap_count=%d\n", new_root_id, gap_count); fw_send_phy_config(card, new_root_id, generation, gap_count); - reset_bus(card, true); + /* + * Where possible, use a short bus reset to minimize + * disruption to isochronous transfers. But in the event + * of a gap count inconsistency, use a long bus reset. + * + * As noted in 1394a 8.4.6.2, nodes on a mixed 1394/1394a bus + * may set different gap counts after a bus reset. On a mixed + * 1394/1394a bus, a short bus reset can get doubled. Some + * nodes may treat the double reset as one bus reset and others + * may treat it as two, causing a gap count inconsistency + * again. Using a long bus reset prevents this. + */ + reset_bus(card, card->gap_count != 0); /* Will allocate broadcast channel after the reset. */ goto out; } diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 3e8d4b51a814..97bafb5f7038 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } - cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL); if (!cap_info->phys) { kfree(cap_info->pages); kfree(cap_info); diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c index 81f5f62e34fc..fbeeaee4ac85 100644 --- a/drivers/firmware/microchip/mpfs-auto-update.c +++ b/drivers/firmware/microchip/mpfs-auto-update.c @@ -167,7 +167,7 @@ static int mpfs_auto_update_verify_image(struct fw_upload *fw_uploader) u32 *response_msg; int ret; - response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(response_msg), + response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(*response_msg), GFP_KERNEL); if (!response_msg) return -ENOMEM; @@ -384,7 +384,8 @@ static int mpfs_auto_update_available(struct mpfs_auto_update_priv *priv) u32 *response_msg; int ret; - response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(response_msg), + response_msg = devm_kzalloc(priv->dev, + AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(*response_msg), GFP_KERNEL); if (!response_msg) return -ENOMEM; diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index e00c33310517..753e7be039e4 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi) if (IS_ERR(chip->gpiod_oe)) return PTR_ERR(chip->gpiod_oe); - gpiod_set_value_cansleep(chip->gpiod_oe, 1); - spi_set_drvdata(spi, chip); chip->gpio_chip.label = spi->modalias; @@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi) goto exit_destroy; } + gpiod_set_value_cansleep(chip->gpiod_oe, 1); + ret = gpiochip_add_data(&chip->gpio_chip, chip); if (!ret) return 0; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 8b3a0f45b574..75be4a3ca7f8 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -968,11 +968,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, ret = gpiochip_irqchip_init_valid_mask(gc); if (ret) - goto err_remove_acpi_chip; + goto err_free_hogs; ret = gpiochip_irqchip_init_hw(gc); if (ret) - goto err_remove_acpi_chip; + goto err_remove_irqchip_mask; ret = gpiochip_add_irqchip(gc, lock_key, request_key); if (ret) @@ -997,13 +997,13 @@ err_remove_irqchip: gpiochip_irqchip_remove(gc); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); -err_remove_acpi_chip: +err_free_hogs: + gpiochip_free_hogs(gc); acpi_gpiochip_remove(gc); + gpiochip_remove_pin_ranges(gc); err_remove_of_chip: - gpiochip_free_hogs(gc); of_gpiochip_remove(gc); err_free_gpiochip_mask: - gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); err_remove_from_list: spin_lock_irqsave(&gpio_lock, flags); @@ -2042,6 +2042,11 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_free); int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset, unsigned long config) { +#ifdef CONFIG_PINCTRL + if (list_empty(&gc->gpiodev->pin_ranges)) + return -ENOTSUPP; +#endif + return pinctrl_gpio_set_config(gc, offset, config); } EXPORT_SYMBOL_GPL(gpiochip_generic_config); diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2520db0b776e..c7edba18a6f0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -199,7 +199,7 @@ config DRM_TTM config DRM_TTM_KUNIT_TEST tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS default n - depends on DRM && KUNIT && MMU + depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST) select DRM_TTM select DRM_EXPORT_FOR_TESTS if m select DRM_KUNIT_TEST_HELPERS @@ -207,7 +207,8 @@ config DRM_TTM_KUNIT_TEST help Enables unit tests for TTM, a GPU memory manager subsystem used to manage memory buffers. This option is mostly useful for kernel - developers. + developers. It depends on (UML || COMPILE_TEST) since no other driver + which uses TTM can be loaded while running the tests. If in doubt, say "N". diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index cc21ed67a330..7099ff9cf8c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1528,6 +1528,9 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) */ void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { + if (adev->in_runpm) + return; + if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c64c01e2944a..1c614451dead 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -574,11 +574,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_asic_reset(struct amdgpu_device *adev) { /* original raven doesn't have full asic reset */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) + /* On the latest Raven, the GPU reset can be performed + * successfully. So now, temporarily enable it for the + * S3 suspend abort case. + */ + if (((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) && + !soc15_need_reset_on_resume(adev)) return 0; switch (soc15_asic_reset_method(adev)) { @@ -1298,24 +1321,6 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } -static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) -{ - u32 sol_reg; - - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - - /* Will reset for the following suspend abort cases. - * 1) Only reset limit on APU side, dGPU hasn't checked yet. - * 2) S3 suspend abort and TOS already launched. - */ - if (adev->flags & AMD_IS_APU && adev->in_s3 && - !adev->suspend_complete && - sol_reg) - return true; - - return false; -} - static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index cf875751971f..5853cf022917 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1843,21 +1843,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_ERROR("amdgpu: fail to register dmub aux callback"); goto error; } - if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); - goto error; - } - if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); - goto error; - } - } - - /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. - * It is expected that DMUB will resend any pending notifications at this point, for - * example HPD from DPIA. - */ - if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. + * It is expected that DMUB will resend any pending notifications at this point. Note + * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to + * align legacy interface initialization sequence. Connection status will be proactivly + * detected once in the amdgpu_dm_initialize_drm_device. + */ dc_enable_dmub_outbox(adev->dm.dc); /* DPIA trace goes to dmesg logs only if outbox is enabled */ @@ -2287,6 +2278,7 @@ static int dm_sw_fini(void *handle) if (adev->dm.dmub_srv) { dmub_srv_destroy(adev->dm.dmub_srv); + kfree(adev->dm.dmub_srv); adev->dm.dmub_srv = NULL; } @@ -3536,6 +3528,14 @@ static void register_hpd_handlers(struct amdgpu_device *adev) int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT; int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT; + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) + DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) + DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + } + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { @@ -3564,10 +3564,6 @@ static void register_hpd_handlers(struct amdgpu_device *adev) handle_hpd_rx_irq, (void *) aconnector); } - - if (adev->dm.hpd_rx_offload_wq) - adev->dm.hpd_rx_offload_wq[connector->index].aconnector = - aconnector; } } @@ -4561,6 +4557,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) goto fail; } + if (dm->hpd_rx_offload_wq) + dm->hpd_rx_offload_wq[aconnector->base.index].aconnector = + aconnector; + if (!dc_link_detect_connection_type(link, &new_connection_type)) DRM_ERROR("KMS: Failed to detect connector\n"); @@ -6534,10 +6534,15 @@ amdgpu_dm_connector_late_register(struct drm_connector *connector) static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct dc_link *dc_link = aconnector->dc_link; struct dc_sink *dc_em_sink = aconnector->dc_em_sink; struct edid *edid; + struct i2c_adapter *ddc; + + if (dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; /* * Note: drm_get_edid gets edid in the following order: @@ -6545,7 +6550,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) * 2) firmware EDID if set via edid_firmware module parameter * 3) regular DDC read. */ - edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc); + edid = drm_get_edid(connector, ddc); if (!edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); return; @@ -6586,12 +6591,18 @@ static int get_modes(struct drm_connector *connector) static void create_eml_sink(struct amdgpu_dm_connector *aconnector) { struct drm_connector *connector = &aconnector->base; - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(&aconnector->base); + struct dc_link *dc_link = aconnector->dc_link; struct dc_sink_init_data init_params = { .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_VIRTUAL }; struct edid *edid; + struct i2c_adapter *ddc; + + if (dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; /* * Note: drm_get_edid gets edid in the following order: @@ -6599,7 +6610,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) * 2) firmware EDID if set via edid_firmware module parameter * 3) regular DDC read. */ - edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc); + edid = drm_get_edid(connector, ddc); if (!edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); return; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 85b7f58a7f35..c27063305a13 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -67,6 +67,8 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */ case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB): case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B): + case drm_edid_encode_panel_id('B', 'O', 'E', 0x092A): + case drm_edid_encode_panel_id('L', 'G', 'D', 0x06D1): DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.remove_sink_ext_caps = true; break; @@ -120,6 +122,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps( edid_caps->edid_hdmi = connector->display_info.is_hdmi; + apply_edid_quirks(edid_buf, edid_caps); + sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads); if (sad_count <= 0) return result; @@ -146,8 +150,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps( else edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION; - apply_edid_quirks(edid_buf, edid_caps); - kfree(sads); kfree(sadb); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 2b79a0e5638e..363d522603a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -125,7 +125,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list) { - struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dc_context *dc_ctx; struct dmub_srv *dmub; enum dmub_status status; int i; @@ -133,6 +133,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, if (!dc_dmub_srv || !dc_dmub_srv->dmub) return false; + dc_ctx = dc_dmub_srv->ctx; dmub = dc_dmub_srv->dmub; for (i = 0 ; i < count; i++) { @@ -1161,7 +1162,7 @@ void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, con bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) { - struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dc_context *dc_ctx; enum dmub_status status; if (!dc_dmub_srv || !dc_dmub_srv->dmub) @@ -1170,6 +1171,8 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation) return true; + dc_ctx = dc_dmub_srv->ctx; + if (wait) { if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { do { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c index e8570060d007..5bca67407c5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c @@ -290,4 +290,5 @@ void dce_panel_cntl_construct( dce_panel_cntl->base.funcs = &dce_link_panel_cntl_funcs; dce_panel_cntl->base.ctx = init_data->ctx; dce_panel_cntl->base.inst = init_data->inst; + dce_panel_cntl->base.pwrseq_inst = 0; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c index ad0df1a72a90..9e96a3ace207 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c @@ -215,4 +215,5 @@ void dcn301_panel_cntl_construct( dcn301_panel_cntl->base.funcs = &dcn301_link_panel_cntl_funcs; dcn301_panel_cntl->base.ctx = init_data->ctx; dcn301_panel_cntl->base.inst = init_data->inst; + dcn301_panel_cntl->base.pwrseq_inst = 0; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index 03248422d6ff..281be20b1a10 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -154,8 +154,24 @@ void dcn31_panel_cntl_construct( struct dcn31_panel_cntl *dcn31_panel_cntl, const struct panel_cntl_init_data *init_data) { + uint8_t pwrseq_inst = 0xF; + dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs; dcn31_panel_cntl->base.ctx = init_data->ctx; dcn31_panel_cntl->base.inst = init_data->inst; - dcn31_panel_cntl->base.pwrseq_inst = init_data->pwrseq_inst; + + switch (init_data->eng_id) { + case ENGINE_ID_DIGA: + pwrseq_inst = 0; + break; + case ENGINE_ID_DIGB: + pwrseq_inst = 1; + break; + default: + DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id); + ASSERT(false); + break; + } + + dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 23a608274096..1ba6933d2b36 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -398,7 +398,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, /* Copy clocks tables entries, if available */ if (dml2->config.bbox_overrides.clks_table.num_states) { p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; - for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) { p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz; } @@ -437,6 +436,14 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } dml2_policy_build_synthetic_soc_states(s, p); + if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 || + dml2->v20.dml_core_ctx.project == dml_project_dcn351) { + // Override last out_state with data from last in_state + // This will ensure that out_state contains max fclk + memcpy(&p->out_states->state_array[p->out_states->num_states - 1], + &p->in_states->state_array[p->in_states->num_states - 1], + sizeof(struct soc_state_bounding_box_st)); + } } void dml2_translate_ip_params(const struct dc *in, struct ip_params_st *out) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 26307e599614..2a58a7687bdb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -76,6 +76,11 @@ static void map_hw_resources(struct dml2_context *dml2, in_out_display_cfg->hw.DLGRefClkFreqMHz = 50; } for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) { + if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) { + dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n", + __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__); + break; + } dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true; dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h index 5dcbaa2db964..e97d964a1791 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h @@ -57,7 +57,7 @@ struct panel_cntl_funcs { struct panel_cntl_init_data { struct dc_context *ctx; uint32_t inst; - uint32_t pwrseq_inst; + uint32_t eng_id; }; struct panel_cntl { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 37d3027c32dc..cf22b8f28ba6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -370,30 +370,6 @@ static enum transmitter translate_encoder_to_transmitter( } } -static uint8_t translate_dig_inst_to_pwrseq_inst(struct dc_link *link) -{ - uint8_t pwrseq_inst = 0xF; - struct dc_context *dc_ctx = link->dc->ctx; - - DC_LOGGER_INIT(dc_ctx->logger); - - switch (link->eng_id) { - case ENGINE_ID_DIGA: - pwrseq_inst = 0; - break; - case ENGINE_ID_DIGB: - pwrseq_inst = 1; - break; - default: - DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", link->eng_id); - ASSERT(false); - break; - } - - return pwrseq_inst; -} - - static void link_destruct(struct dc_link *link) { int i; @@ -657,7 +633,7 @@ static bool construct_phy(struct dc_link *link, link->link_id.id == CONNECTOR_ID_LVDS)) { panel_cntl_init_data.ctx = dc_ctx; panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count; - panel_cntl_init_data.pwrseq_inst = translate_dig_inst_to_pwrseq_inst(link); + panel_cntl_init_data.eng_id = link->eng_id; link->panel_cntl = link->dc->res_pool->funcs->panel_cntl_create( &panel_cntl_init_data); diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index df4f20293c16..eb4da3666e05 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) +{ + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 4cd43bbec910..bcad42534da4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1303,13 +1303,12 @@ static int arcturus_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 8d1d29ffb0f1..ed189a3878eb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2357,13 +2357,12 @@ static int navi10_get_power_limit(struct smu_context *smu, *default_power_limit = power_limit; if (smu->od_enabled && - navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) { + navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 21fc033528fa..e2ad2b972ab0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -640,13 +640,12 @@ static int sienna_cichlid_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a9954ffc02c5..9b80f18ea6c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2369,13 +2369,12 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 0ffdb58af74e..3dc7b60cb075 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2333,13 +2333,12 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index bb55f697a181..6886db2d9e00 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -25,20 +25,18 @@ static void drm_aux_hpd_bridge_release(struct device *dev) ida_free(&drm_aux_hpd_bridge_ida, adev->id); of_node_put(adev->dev.platform_data); + of_node_put(adev->dev.of_node); kfree(adev); } -static void drm_aux_hpd_bridge_unregister_adev(void *_adev) +static void drm_aux_hpd_bridge_free_adev(void *_adev) { - struct auxiliary_device *adev = _adev; - - auxiliary_device_delete(adev); - auxiliary_device_uninit(adev); + auxiliary_device_uninit(_adev); } /** - * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge + * devm_drm_dp_hpd_bridge_alloc - allocate a HPD DisplayPort bridge * @parent: device instance providing this bridge * @np: device node pointer corresponding to this bridge instance * @@ -46,11 +44,9 @@ static void drm_aux_hpd_bridge_unregister_adev(void *_adev) * DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is * able to send the HPD events. * - * Return: device instance that will handle created bridge or an error code - * encoded into the pointer. + * Return: bridge auxiliary device pointer or an error pointer */ -struct device *drm_dp_hpd_bridge_register(struct device *parent, - struct device_node *np) +struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np) { struct auxiliary_device *adev; int ret; @@ -74,18 +70,62 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent, ret = auxiliary_device_init(adev); if (ret) { + of_node_put(adev->dev.platform_data); + of_node_put(adev->dev.of_node); ida_free(&drm_aux_hpd_bridge_ida, adev->id); kfree(adev); return ERR_PTR(ret); } - ret = auxiliary_device_add(adev); - if (ret) { - auxiliary_device_uninit(adev); + ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_free_adev, adev); + if (ret) return ERR_PTR(ret); - } - ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev); + return adev; +} +EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_alloc); + +static void drm_aux_hpd_bridge_del_adev(void *_adev) +{ + auxiliary_device_delete(_adev); +} + +/** + * devm_drm_dp_hpd_bridge_add - register a HDP DisplayPort bridge + * @dev: struct device to tie registration lifetime to + * @adev: bridge auxiliary device to be registered + * + * Returns: zero on success or a negative errno + */ +int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev) +{ + int ret; + + ret = auxiliary_device_add(adev); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, drm_aux_hpd_bridge_del_adev, adev); +} +EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_add); + +/** + * drm_dp_hpd_bridge_register - allocate and register a HDP DisplayPort bridge + * @parent: device instance providing this bridge + * @np: device node pointer corresponding to this bridge instance + * + * Return: device instance that will handle created bridge or an error pointer + */ +struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np) +{ + struct auxiliary_device *adev; + int ret; + + adev = devm_drm_dp_hpd_bridge_alloc(parent, np); + if (IS_ERR(adev)) + return ERR_CAST(adev); + + ret = devm_drm_dp_hpd_bridge_add(parent, adev); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index c1a99bf4dffd..5ebdd6f8f36e 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm, u64 start, u64 end, unsigned int order) { + u64 req_size = mm->chunk_size << order; struct drm_buddy_block *block; struct drm_buddy_block *buddy; LIST_HEAD(dfs); @@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm, if (drm_buddy_block_is_allocated(block)) continue; + if (block_start < start || block_end > end) { + u64 adjusted_start = max(block_start, start); + u64 adjusted_end = min(block_end, end); + + if (round_down(adjusted_end + 1, req_size) <= + round_up(adjusted_start, req_size)) + continue; + } + if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { /* @@ -538,13 +548,13 @@ static int __alloc_range(struct drm_buddy *mm, list_add(&block->left->tmp_link, dfs); } while (1); - list_splice_tail(&allocated, blocks); - if (total_allocated < size) { err = -ENOSPC; goto err_free; } + list_splice_tail(&allocated, blocks); + return 0; err_undo: @@ -761,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, return -EINVAL; /* Actual range allocation */ - if (start + size == end) + if (start + size == end) { + if (!IS_ALIGNED(start | end, min_block_size)) + return -EINVAL; + return __drm_buddy_alloc_range(mm, start, size, NULL, blocks); + } original_size = size; original_min_size = min_block_size; diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 84101baeecc6..a6c19de46292 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1040,7 +1040,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, uint64_t *points; uint32_t signaled_count, i; - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) lockdep_assert_none_held_once(); points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); @@ -1109,7 +1110,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, * fallthough and try a 0 timeout wait! */ - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) { for (i = 0; i < count; ++i) drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]); } @@ -1416,10 +1418,21 @@ syncobj_eventfd_entry_func(struct drm_syncobj *syncobj, /* This happens inside the syncobj lock */ fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1)); + if (!fence) + return; + ret = dma_fence_chain_find_seqno(&fence, entry->point); - if (ret != 0 || !fence) { + if (ret != 0) { + /* The given seqno has not been submitted yet. */ dma_fence_put(fence); return; + } else if (!fence) { + /* If dma_fence_chain_find_seqno returns 0 but sets the fence + * to NULL, it implies that the given seqno is signaled and a + * later seqno has already been submitted. Assign a stub fence + * so that the eventfd still gets signaled below. + */ + fence = dma_fence_get_stub(); } list_del_init(&entry->node); diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index acc6b6804105..2915d7afe5cc 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1209,7 +1209,7 @@ static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo, struct intel_sdvo_tv_format format; u32 format_map; - format_map = 1 << conn_state->tv.mode; + format_map = 1 << conn_state->tv.legacy_mode; memset(&format, 0, sizeof(format)); memcpy(&format, &format_map, min(sizeof(format), sizeof(format_map))); @@ -2298,7 +2298,7 @@ static int intel_sdvo_get_tv_modes(struct drm_connector *connector) * Read the list of supported input resolutions for the selected TV * format. */ - format_map = 1 << conn_state->tv.mode; + format_map = 1 << conn_state->tv.legacy_mode; memcpy(&tv_res, &format_map, min(sizeof(format_map), sizeof(struct intel_sdvo_sdtv_resolution_request))); @@ -2363,7 +2363,7 @@ intel_sdvo_connector_atomic_get_property(struct drm_connector *connector, int i; for (i = 0; i < intel_sdvo_connector->format_supported_num; i++) - if (state->tv.mode == intel_sdvo_connector->tv_format_supported[i]) { + if (state->tv.legacy_mode == intel_sdvo_connector->tv_format_supported[i]) { *val = i; return 0; @@ -2419,7 +2419,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector, struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(state); if (property == intel_sdvo_connector->tv_format) { - state->tv.mode = intel_sdvo_connector->tv_format_supported[val]; + state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[val]; if (state->crtc) { struct drm_crtc_state *crtc_state = @@ -3076,7 +3076,7 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, drm_property_add_enum(intel_sdvo_connector->tv_format, i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); - intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0]; + intel_sdvo_connector->base.base.state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[0]; drm_object_attach_property(&intel_sdvo_connector->base.base.base, intel_sdvo_connector->tv_format, 0); return true; diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index d4386cb3569e..992a725de751 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -949,7 +949,7 @@ intel_disable_tv(struct intel_atomic_state *state, static const struct tv_mode *intel_tv_mode_find(const struct drm_connector_state *conn_state) { - int format = conn_state->tv.mode; + int format = conn_state->tv.legacy_mode; return &tv_modes[format]; } @@ -1704,7 +1704,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector) break; } - connector->state->tv.mode = i; + connector->state->tv.legacy_mode = i; } static int @@ -1859,7 +1859,7 @@ static int intel_tv_atomic_check(struct drm_connector *connector, old_state = drm_atomic_get_old_connector_state(state, connector); new_crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc); - if (old_state->tv.mode != new_state->tv.mode || + if (old_state->tv.legacy_mode != new_state->tv.legacy_mode || old_state->tv.margins.left != new_state->tv.margins.left || old_state->tv.margins.right != new_state->tv.margins.right || old_state->tv.margins.top != new_state->tv.margins.top || @@ -1896,7 +1896,7 @@ static void intel_tv_add_properties(struct drm_connector *connector) conn_state->tv.margins.right = 46; conn_state->tv.margins.bottom = 37; - conn_state->tv.mode = 0; + conn_state->tv.legacy_mode = 0; /* Create TV properties then attach current values */ for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { @@ -1910,7 +1910,7 @@ static void intel_tv_add_properties(struct drm_connector *connector) drm_object_attach_property(&connector->base, i915->drm.mode_config.legacy_tv_mode_property, - conn_state->tv.mode); + conn_state->tv.legacy_mode); drm_object_attach_property(&connector->base, i915->drm.mode_config.tv_left_margin_property, conn_state->tv.margins.left); diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c index 3f73b211fa8e..3407450435e2 100644 --- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c +++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c @@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_CVBS]) { meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS]; drm_bridge_remove(&meson_encoder_cvbs->bridge); - drm_bridge_remove(meson_encoder_cvbs->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_dsi.c b/drivers/gpu/drm/meson/meson_encoder_dsi.c index 3f93c70488ca..311b91630fbe 100644 --- a/drivers/gpu/drm/meson/meson_encoder_dsi.c +++ b/drivers/gpu/drm/meson/meson_encoder_dsi.c @@ -168,6 +168,5 @@ void meson_encoder_dsi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_DSI]) { meson_encoder_dsi = priv->encoders[MESON_ENC_DSI]; drm_bridge_remove(&meson_encoder_dsi->bridge); - drm_bridge_remove(meson_encoder_dsi->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 25ea76558690..c4686568c9ca 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -474,6 +474,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_HDMI]) { meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI]; drm_bridge_remove(&meson_encoder_hdmi->bridge); - drm_bridge_remove(meson_encoder_hdmi->next_bridge); } } diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index d37d599aec27..4c72124ffb5d 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -329,10 +329,26 @@ static const struct component_ops dp_display_comp_ops = { .unbind = dp_display_unbind, }; +static void dp_display_send_hpd_event(struct msm_dp *dp_display) +{ + struct dp_display_private *dp; + struct drm_connector *connector; + + dp = container_of(dp_display, struct dp_display_private, dp_display); + + connector = dp->dp_display.connector; + drm_helper_hpd_irq_event(connector->dev); +} + static int dp_display_send_hpd_notification(struct dp_display_private *dp, bool hpd) { - struct drm_bridge *bridge = dp->dp_display.bridge; + if ((hpd && dp->dp_display.link_ready) || + (!hpd && !dp->dp_display.link_ready)) { + drm_dbg_dp(dp->drm_dev, "HPD already %s\n", + (hpd ? "on" : "off")); + return 0; + } /* reset video pattern flag on disconnect */ if (!hpd) { @@ -348,7 +364,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n", dp->dp_display.connector_type, hpd); - drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready); + dp_display_send_hpd_event(&dp->dp_display); return 0; } diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 1e6aaf95ff7c..ceef470c9fbf 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -100,3 +100,11 @@ config DRM_NOUVEAU_SVM help Say Y here if you want to enable experimental support for Shared Virtual Memory (SVM). + +config DRM_NOUVEAU_GSP_DEFAULT + bool "Use GSP firmware for Turing/Ampere (needs firmware installed)" + depends on DRM_NOUVEAU + default n + help + Say Y here if you want to use the GSP codepaths by default on + Turing and Ampere GPUs. diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index d1bb8151a1df..80f74ee0fc78 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -199,6 +199,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_drm *drm = nouveau_drm(dev); struct nvif_device *device = &drm->client.device; + struct nvkm_device *nvkm_device = nvxx_device(&drm->client.device); struct nvkm_gr *gr = nvxx_gr(device); struct drm_nouveau_getparam *getparam = data; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -263,6 +264,14 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) getparam->value = nouveau_exec_push_max_from_ib_max(ib_max); break; } + case NOUVEAU_GETPARAM_VRAM_BAR_SIZE: + getparam->value = nvkm_device->func->resource_size(nvkm_device, 1); + break; + case NOUVEAU_GETPARAM_VRAM_USED: { + struct ttm_resource_manager *vram_mgr = ttm_manager_type(&drm->ttm.bdev, TTM_PL_VRAM); + getparam->value = (u64)ttm_resource_manager_usage(vram_mgr); + break; + } default: NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param); return -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c index 4135690326f4..3a30bea30e36 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c @@ -168,12 +168,11 @@ r535_bar_new_(const struct nvkm_bar_func *hw, struct nvkm_device *device, rm->flush = r535_bar_flush; ret = gf100_bar_new_(rm, device, type, inst, &bar); - *pbar = bar; if (ret) { - if (!bar) - kfree(rm); + kfree(rm); return ret; } + *pbar = bar; bar->flushBAR2PhysMode = ioremap(device->func->resource_addr(device, 3), PAGE_SIZE); if (!bar->flushBAR2PhysMode) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 19188683c8fc..8c2bf1c16f2a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name) return (void *)fw; } +static void +shadow_fw_release(void *fw) +{ + release_firmware(fw); +} + static const struct nvbios_source shadow_fw = { .name = "firmware", .init = shadow_fw_init, - .fini = (void(*)(void *))release_firmware, + .fini = shadow_fw_release, .read = shadow_fw_read, .rw = false, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index a41735ab6068..a73a5b589790 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1054,8 +1054,6 @@ r535_gsp_postinit(struct nvkm_gsp *gsp) /* Release the DMA buffers that were needed only for boot and init */ nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw); nvkm_gsp_mem_dtor(gsp, &gsp->libos); - nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); - nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); return ret; } @@ -2163,6 +2161,8 @@ r535_gsp_dtor(struct nvkm_gsp *gsp) r535_gsp_dtor_fws(gsp); + nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); + nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem); nvkm_gsp_mem_dtor(gsp, &gsp->loginit); nvkm_gsp_mem_dtor(gsp, &gsp->logintr); @@ -2312,8 +2312,12 @@ r535_gsp_load(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif) { struct nvkm_subdev *subdev = &gsp->subdev; int ret; + bool enable_gsp = fwif->enable; - if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", fwif->enable)) +#if IS_ENABLED(CONFIG_DRM_NOUVEAU_GSP_DEFAULT) + enable_gsp = true; +#endif + if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", enable_gsp)) return -EINVAL; if ((ret = r535_gsp_load_fw(gsp, "gsp", fwif->ver, &gsp->fws.rm)) || diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index a73cff7a3070..03d1c76aec2d 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1243,9 +1243,26 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(&tegra_drm_driver); - if (err < 0) - goto hub; + /* + * Only take over from a potential firmware framebuffer if any CRTCs + * have been registered. This must not be a fatal error because there + * are other accelerators that are exposed via this driver. + * + * Another case where this happens is on Tegra234 where the display + * hardware is no longer part of the host1x complex, so this driver + * will not expose any modesetting features. + */ + if (drm->mode_config.num_crtc > 0) { + err = drm_aperture_remove_framebuffers(&tegra_drm_driver); + if (err < 0) + goto hub; + } else { + /* + * Indicate to userspace that this doesn't expose any display + * capabilities. + */ + drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); + } err = drm_dev_register(drm, 0); if (err < 0) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index 8a464f7f4c61..be2d9d7764be 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -14,11 +14,216 @@ #include "../lib/drm_random.h" +static unsigned int random_seed; + static inline u64 get_size(int order, u64 chunk_size) { return (1 << order) * chunk_size; } +static void drm_test_buddy_alloc_range_bias(struct kunit *test) +{ + u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem; + DRM_RND_STATE(prng, random_seed); + unsigned int i, count, *order; + struct drm_buddy mm; + LIST_HEAD(allocated); + + bias_size = SZ_1M; + ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size); + ps = max(SZ_4K, ps); + mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */ + + kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + count = mm_size / bias_size; + order = drm_random_order(count, &prng); + KUNIT_EXPECT_TRUE(test, order); + + /* + * Idea is to split the address space into uniform bias ranges, and then + * in some random order allocate within each bias, using various + * patterns within. This should detect if allocations leak out from a + * given bias, for example. + */ + + for (i = 0; i < count; i++) { + LIST_HEAD(tmp); + u32 size; + + bias_start = order[i] * bias_size; + bias_end = bias_start + bias_size; + bias_rem = bias_size; + + /* internal round_up too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, bias_size, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + + /* size too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size + ps, ps); + + /* bias range too small for size */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end, bias_size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end, bias_size, ps); + + /* bias misaligned */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end - ps, + bias_size >> 1, bias_size >> 1, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1); + + /* single big page */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + drm_buddy_free_list(&mm, &tmp); + + /* single page with internal round_up */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, ps, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, ps, bias_size); + drm_buddy_free_list(&mm, &tmp); + + /* random size within */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + if (size) + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + + bias_rem -= size; + /* too big for current avail */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_rem + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_rem + ps, ps); + + if (bias_rem) { + /* random fill of the remainder */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + size = max(size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + /* + * Intentionally allow some space to be left + * unallocated, and ideally not always on the bias + * boundaries. + */ + drm_buddy_free_list(&mm, &tmp); + } else { + list_splice_tail(&tmp, &allocated); + } + } + + kfree(order); + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); + + /* + * Something more free-form. Idea is to pick a random starting bias + * range within the address space and then start filling it up. Also + * randomly grow the bias range in both directions as we go along. This + * should give us bias start/end which is not always uniform like above, + * and in some cases will require the allocator to jump over already + * allocated nodes in the middle of the address space. + */ + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); + bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); + bias_end = max(bias_end, bias_start + ps); + bias_rem = bias_end - bias_start; + + do { + u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size); + bias_rem -= size; + + /* + * Try to randomly grow the bias range in both directions, or + * only one, or perhaps don't grow at all. + */ + do { + u32 old_bias_start = bias_start; + u32 old_bias_end = bias_end; + + if (bias_start) + bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps); + if (bias_end != mm_size) + bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps); + + bias_rem += old_bias_start - bias_start; + bias_rem += bias_end - old_bias_end; + } while (!bias_rem && (bias_start || bias_end != mm_size)); + } while (bias_rem); + + KUNIT_ASSERT_EQ(test, bias_start, 0); + KUNIT_ASSERT_EQ(test, bias_end, mm_size); + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, bias_end, + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc passed with bias(%x-%x), size=%u\n", + bias_start, bias_end, ps); + + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); +} + static void drm_test_buddy_alloc_contiguous(struct kunit *test) { const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K; @@ -55,30 +260,30 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, ps, ps, list, 0), - "buddy_alloc hit an error size=%d\n", + "buddy_alloc hit an error size=%u\n", ps); } while (++i < n_pages); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%d\n", 3 * ps); + "buddy_alloc didn't error size=%u\n", 3 * ps); drm_buddy_free_list(&mm, &middle); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%llu\n", 3 * ps); + "buddy_alloc didn't error size=%u\n", 3 * ps); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 2 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%llu\n", 2 * ps); + "buddy_alloc didn't error size=%u\n", 2 * ps); drm_buddy_free_list(&mm, &right); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%llu\n", 3 * ps); + "buddy_alloc didn't error size=%u\n", 3 * ps); /* * At this point we should have enough contiguous space for 2 blocks, * however they are never buddies (since we freed middle and right) so @@ -87,13 +292,13 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 2 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc hit an error size=%d\n", 2 * ps); + "buddy_alloc hit an error size=%u\n", 2 * ps); drm_buddy_free_list(&mm, &left); KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc hit an error size=%d\n", 3 * ps); + "buddy_alloc hit an error size=%u\n", 3 * ps); total = 0; list_for_each_entry(block, &allocated, link) @@ -362,17 +567,30 @@ static void drm_test_buddy_alloc_limit(struct kunit *test) drm_buddy_fini(&mm); } +static int drm_buddy_suite_init(struct kunit_suite *suite) +{ + while (!random_seed) + random_seed = get_random_u32(); + + kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n", + random_seed); + + return 0; +} + static struct kunit_case drm_buddy_tests[] = { KUNIT_CASE(drm_test_buddy_alloc_limit), KUNIT_CASE(drm_test_buddy_alloc_optimistic), KUNIT_CASE(drm_test_buddy_alloc_pessimistic), KUNIT_CASE(drm_test_buddy_alloc_pathological), KUNIT_CASE(drm_test_buddy_alloc_contiguous), + KUNIT_CASE(drm_test_buddy_alloc_range_bias), {} }; static struct kunit_suite drm_buddy_test_suite = { .name = "drm_buddy", + .suite_init = drm_buddy_suite_init, .test_cases = drm_buddy_tests, }; diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index b62f420a9f96..112438d965ff 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -387,7 +387,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, enum ttm_caching caching, pgoff_t start_page, pgoff_t end_page) { - struct page **pages = tt->pages; + struct page **pages = &tt->pages[start_page]; unsigned int order; pgoff_t i, nr; diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c index ef56bd517b28..421b819fd4ba 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c @@ -21,4 +21,5 @@ kunit_test_suite(xe_mocs_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_mocs kunit test"); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 0b0e262e2166..4d3b80ec906d 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -28,6 +28,14 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" +const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = { + [XE_PL_SYSTEM] = "system", + [XE_PL_TT] = "gtt", + [XE_PL_VRAM0] = "vram0", + [XE_PL_VRAM1] = "vram1", + [XE_PL_STOLEN] = "stolen" +}; + static const struct ttm_place sys_placement_flags = { .fpfn = 0, .lpfn = 0, @@ -713,8 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); - - trace_xe_bo_move(bo); + trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); xe_device_mem_access_get(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 9b1279aca127..8be42ac6cd07 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -243,6 +243,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo); extern struct ttm_device_funcs xe_ttm_funcs; +extern const char *const xe_mem_type_to_name[]; int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1f0b4b9ce84f..5176c27e4b6a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -83,9 +83,6 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) return 0; } -static void device_kill_persistent_exec_queues(struct xe_device *xe, - struct xe_file *xef); - static void xe_file_close(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -102,8 +99,6 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) mutex_unlock(&xef->exec_queue.lock); xa_destroy(&xef->exec_queue.xa); mutex_destroy(&xef->exec_queue.lock); - device_kill_persistent_exec_queues(xe, xef); - mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); @@ -255,9 +250,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xa_erase(&xe->usm.asid_to_vm, asid); } - drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock); - INIT_LIST_HEAD(&xe->persistent_engines.list); - spin_lock_init(&xe->pinned.lock); INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); INIT_LIST_HEAD(&xe->pinned.external_vram); @@ -570,37 +562,6 @@ void xe_device_shutdown(struct xe_device *xe) { } -void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q) -{ - mutex_lock(&xe->persistent_engines.lock); - list_add_tail(&q->persistent.link, &xe->persistent_engines.list); - mutex_unlock(&xe->persistent_engines.lock); -} - -void xe_device_remove_persistent_exec_queues(struct xe_device *xe, - struct xe_exec_queue *q) -{ - mutex_lock(&xe->persistent_engines.lock); - if (!list_empty(&q->persistent.link)) - list_del(&q->persistent.link); - mutex_unlock(&xe->persistent_engines.lock); -} - -static void device_kill_persistent_exec_queues(struct xe_device *xe, - struct xe_file *xef) -{ - struct xe_exec_queue *q, *next; - - mutex_lock(&xe->persistent_engines.lock); - list_for_each_entry_safe(q, next, &xe->persistent_engines.list, - persistent.link) - if (q->persistent.xef == xef) { - xe_exec_queue_kill(q); - list_del_init(&q->persistent.link); - } - mutex_unlock(&xe->persistent_engines.lock); -} - void xe_device_wmb(struct xe_device *xe) { struct xe_gt *gt = xe_root_mmio_gt(xe); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 3da83b233206..08d8b72c7731 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -42,10 +42,6 @@ int xe_device_probe(struct xe_device *xe); void xe_device_remove(struct xe_device *xe); void xe_device_shutdown(struct xe_device *xe); -void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q); -void xe_device_remove_persistent_exec_queues(struct xe_device *xe, - struct xe_exec_queue *q); - void xe_device_wmb(struct xe_device *xe); static inline struct xe_file *to_xe_file(const struct drm_file *file) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5dc9127a2029..e8491979a6f2 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -341,14 +341,6 @@ struct xe_device { struct mutex lock; } usm; - /** @persistent_engines: engines that are closed but still running */ - struct { - /** @lock: protects persistent engines */ - struct mutex lock; - /** @list: list of persistent engines */ - struct list_head list; - } persistent_engines; - /** @pinned: pinned BO state */ struct { /** @lock: protected pinned BO list state */ diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 82d1305e831f..6040e4d22b28 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -131,14 +131,6 @@ static void bo_meminfo(struct xe_bo *bo, static void show_meminfo(struct drm_printer *p, struct drm_file *file) { - static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES] = { - [XE_PL_SYSTEM] = "system", - [XE_PL_TT] = "gtt", - [XE_PL_VRAM0] = "vram0", - [XE_PL_VRAM1] = "vram1", - [4 ... 6] = NULL, - [XE_PL_STOLEN] = "stolen" - }; struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {}; struct xe_file *xef = file->driver_priv; struct ttm_device *bdev = &xef->xe->ttm; @@ -171,7 +163,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) spin_unlock(&client->bos_lock); for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { - if (!mem_type_to_name[mem_type]) + if (!xe_mem_type_to_name[mem_type]) continue; man = ttm_manager_type(bdev, mem_type); @@ -182,7 +174,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) DRM_GEM_OBJECT_RESIDENT | (mem_type != XE_PL_SYSTEM ? 0 : DRM_GEM_OBJECT_PURGEABLE), - mem_type_to_name[mem_type]); + xe_mem_type_to_name[mem_type]); } } } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 254b1d3af4cb..49223026c89f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -60,7 +60,6 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, q->fence_irq = >->fence_irq[hwe->class]; q->ring_ops = gt->ring_ops[hwe->class]; q->ops = gt->exec_queue_ops; - INIT_LIST_HEAD(&q->persistent.link); INIT_LIST_HEAD(&q->compute.link); INIT_LIST_HEAD(&q->multi_gt_link); @@ -310,102 +309,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * return q->ops->set_timeslice(q, value); } -static int exec_queue_set_preemption_timeout(struct xe_device *xe, - struct xe_exec_queue *q, u64 value, - bool create) -{ - u32 min = 0, max = 0; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - return q->ops->set_preempt_timeout(q, value); -} - -static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm))) - return -EINVAL; - - if (value) - q->flags |= EXEC_QUEUE_FLAG_PERSISTENT; - else - q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT; - - return 0; -} - -static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - u32 min = 0, max = 0; - - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - return q->ops->set_job_timeout(q, value); -} - -static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_trigger = value; - - return 0; -} - -static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_notify = value; - - return 0; -} - -static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - if (value > DRM_XE_ACC_GRANULARITY_64M) - return -EINVAL; - - q->usm.acc_granularity = value; - - return 0; -} - typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create); @@ -413,12 +316,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -437,10 +334,15 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(exec_queue_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad)) + XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); + if (!exec_queue_set_property_funcs[idx]) + return -EINVAL; + return exec_queue_set_property_funcs[idx](xe, q, ext.value, create); } @@ -704,9 +606,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, - xe_vm_in_lr_mode(vm) ? 0 : - EXEC_QUEUE_FLAG_PERSISTENT); + args->width, hwe, 0); up_read(&vm->lock); xe_vm_put(vm); if (IS_ERR(q)) @@ -728,8 +628,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, goto kill_exec_queue; } - q->persistent.xef = xef; - mutex_lock(&xef->exec_queue.lock); err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); mutex_unlock(&xef->exec_queue.lock); @@ -872,10 +770,7 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; - if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT)) - xe_exec_queue_kill(q); - else - xe_device_add_persistent_exec_queues(xe, q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); xe_exec_queue_put(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 8d4b7feb8c30..36f4901d8d7e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -105,16 +105,6 @@ struct xe_exec_queue { struct xe_guc_exec_queue *guc; }; - /** - * @persistent: persistent exec queue state - */ - struct { - /** @xef: file which this exec queue belongs to */ - struct xe_file *xef; - /** @link: link in list of persistent exec queues */ - struct list_head link; - } persistent; - union { /** * @parallel: parallel submission state @@ -160,16 +150,6 @@ struct xe_exec_queue { spinlock_t lock; } compute; - /** @usm: unified shared memory state */ - struct { - /** @acc_trigger: access counter trigger */ - u32 acc_trigger; - /** @acc_notify: access counter notify */ - u32 acc_notify; - /** @acc_granularity: access counter granularity */ - u32 acc_granularity; - } usm; - /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 96b5224eb478..acb4d9f38fd7 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) { struct xe_execlist_port *port = exl->port; - enum xe_exec_queue_priority priority = exl->active_priority; + enum xe_exec_queue_priority priority = exl->q->sched_props.priority; XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); XE_WARN_ON(priority < 0); @@ -378,8 +378,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) list_del(&exl->active_link); spin_unlock_irqrestore(&exl->port->lock, flags); - if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) - xe_device_remove_persistent_exec_queues(xe, q); drm_sched_entity_fini(&exl->entity); drm_sched_fini(&exl->sched); kfree(exl); diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 9358f7336889..9fcae65b6469 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -145,10 +145,10 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) } if (xe_gt_is_media_type(gt)) { - sprintf(gtidle->name, "gt%d-mc\n", gt->info.id); + sprintf(gtidle->name, "gt%d-mc", gt->info.id); gtidle->idle_residency = xe_guc_pc_mc6_residency; } else { - sprintf(gtidle->name, "gt%d-rc\n", gt->info.id); + sprintf(gtidle->name, "gt%d-rc", gt->info.id); gtidle->idle_residency = xe_guc_pc_rc6_residency; } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 7eef23a00d77..f4c485289dbe 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -247,6 +247,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, xe_gt_assert(gt, vma); + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) { + if (fence) + __invalidation_fence_signal(fence); + + return 0; + } + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ if (!xe->info.has_range_tlb_invalidation) { @@ -317,6 +325,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) struct drm_printer p = drm_err_printer(__func__); int ret; + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) + return 0; + /* * XXX: See above, this algorithm only works if seqno are always in * order diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 54ffcfcdd41f..f22ae717b0b2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1028,8 +1028,6 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); - if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) - xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q); release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 0ec5ad2539f1..b38319d2801e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -682,8 +682,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) #define PVC_CTX_ASID (0x2e + 1) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) -#define ACC_GRANULARITY_S 20 -#define ACC_NOTIFY_S 16 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) @@ -754,13 +752,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, - (q->usm.acc_granularity << - ACC_GRANULARITY_S) | vm->usm.asid); - if (xe->info.has_usm && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, - (q->usm.acc_notify << ACC_NOTIFY_S) | - q->usm.acc_trigger); + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5f6b53ea5528..02f7808f28ca 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -105,7 +105,7 @@ static void xe_resize_vram_bar(struct xe_device *xe) pci_bus_for_each_resource(root, root_res, i) { if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && - root_res->start > 0x100000000ull) + (u64)root_res->start > 0x100000000ul) break; } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ac19bfa3f798..6653c045f3c9 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -499,10 +499,12 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, * this device *requires* 64K PTE size for VRAM, fail. */ if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) + if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { + xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; pte |= XE_PTE_PS64; - else if (XE_WARN_ON(xe_walk->needs_64K)) + } else if (XE_WARN_ON(xe_walk->needs_64K)) { return -EINVAL; + } } ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); @@ -545,13 +547,16 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, *child = &xe_child->base; /* - * Prefer the compact pagetable layout for L0 if possible. + * Prefer the compact pagetable layout for L0 if possible. Only + * possible if VMA covers entire 2MB region as compact 64k and + * 4k pages cannot be mixed within a 2MB region. * TODO: Suballocate the pt bo to avoid wasting a lot of * memory. */ if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && covers && xe_pt_scan_64K(addr, next, xe_walk)) { walk->shifts = xe_compact_pt_shifts; + xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; flags |= XE_PDE_64K; xe_child->is_compact = true; } diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index aab92bee1d7c..02c9577fe418 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -19,7 +19,7 @@ #include "xe_macros.h" #include "xe_sched_job_types.h" -struct user_fence { +struct xe_user_fence { struct xe_device *xe; struct kref refcount; struct dma_fence_cb cb; @@ -27,31 +27,32 @@ struct user_fence { struct mm_struct *mm; u64 __user *addr; u64 value; + int signalled; }; static void user_fence_destroy(struct kref *kref) { - struct user_fence *ufence = container_of(kref, struct user_fence, + struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence, refcount); mmdrop(ufence->mm); kfree(ufence); } -static void user_fence_get(struct user_fence *ufence) +static void user_fence_get(struct xe_user_fence *ufence) { kref_get(&ufence->refcount); } -static void user_fence_put(struct user_fence *ufence) +static void user_fence_put(struct xe_user_fence *ufence) { kref_put(&ufence->refcount, user_fence_destroy); } -static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, - u64 value) +static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, + u64 value) { - struct user_fence *ufence; + struct xe_user_fence *ufence; ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) @@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, static void user_fence_worker(struct work_struct *w) { - struct user_fence *ufence = container_of(w, struct user_fence, worker); + struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); @@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w) } wake_up_all(&ufence->xe->ufence_wq); + WRITE_ONCE(ufence->signalled, 1); user_fence_put(ufence); } -static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) +static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence) { INIT_WORK(&ufence->worker, user_fence_worker); queue_work(ufence->xe->ordered_wq, &ufence->worker); @@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct user_fence *ufence = container_of(cb, struct user_fence, cb); + struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb); kick_ufence(ufence, fence); } @@ -340,3 +342,39 @@ err_out: return ERR_PTR(-ENOMEM); } + +/** + * xe_sync_ufence_get() - Get user fence from sync + * @sync: input sync + * + * Get a user fence reference from sync. + * + * Return: xe_user_fence pointer with reference + */ +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync) +{ + user_fence_get(sync->ufence); + + return sync->ufence; +} + +/** + * xe_sync_ufence_put() - Put user fence reference + * @ufence: user fence reference + * + */ +void xe_sync_ufence_put(struct xe_user_fence *ufence) +{ + user_fence_put(ufence); +} + +/** + * xe_sync_ufence_get_status() - Get user fence status + * @ufence: user fence + * + * Return: 1 if signalled, 0 not signalled, <0 on error + */ +int xe_sync_ufence_get_status(struct xe_user_fence *ufence) +{ + return READ_ONCE(ufence->signalled); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index f43cdcaca6c5..0fd0d51208e6 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -38,4 +38,8 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) return !!sync->ufence; } +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync); +void xe_sync_ufence_put(struct xe_user_fence *ufence); +int xe_sync_ufence_get_status(struct xe_user_fence *ufence); + #endif diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 852db5e7884f..30ac3f51993b 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,7 +18,7 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; - struct user_fence *ufence; + struct xe_user_fence *ufence; u64 addr; u64 timeline_value; u32 type; diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 95163c303f3e..4ddc55527f9a 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -12,6 +12,7 @@ #include <linux/tracepoint.h> #include <linux/types.h> +#include "xe_bo.h" #include "xe_bo_types.h" #include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" @@ -26,16 +27,16 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, TP_ARGS(fence), TP_STRUCT__entry( - __field(u64, fence) + __field(struct xe_gt_tlb_invalidation_fence *, fence) __field(int, seqno) ), TP_fast_assign( - __entry->fence = (u64)fence; + __entry->fence = fence; __entry->seqno = fence->seqno; ), - TP_printk("fence=0x%016llx, seqno=%d", + TP_printk("fence=%p, seqno=%d", __entry->fence, __entry->seqno) ); @@ -82,16 +83,16 @@ DECLARE_EVENT_CLASS(xe_bo, TP_STRUCT__entry( __field(size_t, size) __field(u32, flags) - __field(u64, vm) + __field(struct xe_vm *, vm) ), TP_fast_assign( __entry->size = bo->size; __entry->flags = bo->flags; - __entry->vm = (unsigned long)bo->vm; + __entry->vm = bo->vm; ), - TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx", + TP_printk("size=%zu, flags=0x%02x, vm=%p", __entry->size, __entry->flags, __entry->vm) ); @@ -100,9 +101,31 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, TP_ARGS(bo) ); -DEFINE_EVENT(xe_bo, xe_bo_move, - TP_PROTO(struct xe_bo *bo), - TP_ARGS(bo) +TRACE_EVENT(xe_bo_move, + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, + bool move_lacks_source), + TP_ARGS(bo, new_placement, old_placement, move_lacks_source), + TP_STRUCT__entry( + __field(struct xe_bo *, bo) + __field(size_t, size) + __field(u32, new_placement) + __field(u32, old_placement) + __array(char, device_id, 12) + __field(bool, move_lacks_source) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->size = bo->size; + __entry->new_placement = new_placement; + __entry->old_placement = old_placement; + strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + __entry->move_lacks_source = move_lacks_source; + ), + TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, + xe_mem_type_to_name[__entry->old_placement], + xe_mem_type_to_name[__entry->new_placement], __entry->device_id) ); DECLARE_EVENT_CLASS(xe_exec_queue, @@ -327,16 +350,16 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_STRUCT__entry( __field(u64, ctx) __field(u32, seqno) - __field(u64, fence) + __field(struct xe_hw_fence *, fence) ), TP_fast_assign( __entry->ctx = fence->dma.context; __entry->seqno = fence->dma.seqno; - __entry->fence = (unsigned long)fence; + __entry->fence = fence; ), - TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", + TP_printk("ctx=0x%016llx, fence=%p, seqno=%u", __entry->ctx, __entry->fence, __entry->seqno) ); @@ -365,7 +388,7 @@ DECLARE_EVENT_CLASS(xe_vma, TP_ARGS(vma), TP_STRUCT__entry( - __field(u64, vma) + __field(struct xe_vma *, vma) __field(u32, asid) __field(u64, start) __field(u64, end) @@ -373,14 +396,14 @@ DECLARE_EVENT_CLASS(xe_vma, ), TP_fast_assign( - __entry->vma = (unsigned long)vma; + __entry->vma = vma; __entry->asid = xe_vma_vm(vma)->usm.asid; __entry->start = xe_vma_start(vma); __entry->end = xe_vma_end(vma) - 1; __entry->ptr = xe_vma_userptr(vma); ), - TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", + TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", __entry->vma, __entry->asid, __entry->start, __entry->end, __entry->ptr) ) @@ -465,16 +488,16 @@ DECLARE_EVENT_CLASS(xe_vm, TP_ARGS(vm), TP_STRUCT__entry( - __field(u64, vm) + __field(struct xe_vm *, vm) __field(u32, asid) ), TP_fast_assign( - __entry->vm = (unsigned long)vm; + __entry->vm = vm; __entry->asid = vm->usm.asid; ), - TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm, + TP_printk("vm=%p, asid=0x%05x", __entry->vm, __entry->asid) ); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7b00faa67287..3b21afe5b488 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -897,6 +897,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma) struct xe_device *xe = vm->xe; bool read_only = xe_vma_read_only(vma); + if (vma->ufence) { + xe_sync_ufence_put(vma->ufence); + vma->ufence = NULL; + } + if (xe_vma_is_userptr(vma)) { struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; @@ -1608,6 +1613,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, trace_xe_vma_unbind(vma); + if (vma->ufence) { + struct xe_user_fence * const f = vma->ufence; + + if (!xe_sync_ufence_get_status(f)) + return ERR_PTR(-EBUSY); + + vma->ufence = NULL; + xe_sync_ufence_put(f); + } + if (number_tiles > 1) { fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); @@ -1741,6 +1756,21 @@ err_fences: return ERR_PTR(err); } +static struct xe_user_fence * +find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) +{ + unsigned int i; + + for (i = 0; i < num_syncs; i++) { + struct xe_sync_entry *e = &syncs[i]; + + if (xe_sync_is_ufence(e)) + return xe_sync_ufence_get(e); + } + + return NULL; +} + static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool immediate, bool first_op, @@ -1748,9 +1778,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + struct xe_user_fence *ufence; xe_vm_assert_held(vm); + ufence = find_ufence_get(syncs, num_syncs); + if (vma->ufence && ufence) + xe_sync_ufence_put(vma->ufence); + + vma->ufence = ufence ?: vma->ufence; + if (immediate) { fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, last_op); @@ -2117,10 +2154,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.immediate = - flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.pat_index = pat_index; } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { @@ -2190,15 +2223,17 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma) { if (vma->gpuva.flags & XE_VMA_PTE_1G) return SZ_1G; - else if (vma->gpuva.flags & XE_VMA_PTE_2M) + else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) return SZ_2M; + else if (vma->gpuva.flags & XE_VMA_PTE_64K) + return SZ_64K; else if (vma->gpuva.flags & XE_VMA_PTE_4K) return SZ_4K; return SZ_1G; /* Uninitialized, used max size */ } -static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) +static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) { switch (size) { case SZ_1G: @@ -2207,9 +2242,13 @@ static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) case SZ_2M: vma->gpuva.flags |= XE_VMA_PTE_2M; break; + case SZ_64K: + vma->gpuva.flags |= XE_VMA_PTE_64K; + break; + case SZ_4K: + vma->gpuva.flags |= XE_VMA_PTE_4K; + break; } - - return SZ_4K; } static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) @@ -2307,8 +2346,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; @@ -2439,7 +2476,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - op->map.immediate || !xe_vm_in_fault_mode(vm), + !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2714,14 +2751,11 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#define SUPPORTED_FLAGS \ - (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL) +#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \ + DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) -#define MAX_BINDS 512 /* FIXME: Picking random upper limit */ - static int vm_bind_ioctl_check_args(struct xe_device *xe, struct drm_xe_vm_bind *args, struct drm_xe_vm_bind_op **bind_ops) @@ -2733,16 +2767,16 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) + if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; if (args->num_binds > 1) { u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); - *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * - args->num_binds, GFP_KERNEL); + *bind_ops = kvmalloc_array(args->num_binds, + sizeof(struct drm_xe_vm_bind_op), + GFP_KERNEL | __GFP_ACCOUNT); if (!*bind_ops) return -ENOMEM; @@ -2832,7 +2866,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, free_bind_ops: if (args->num_binds > 1) - kfree(*bind_ops); + kvfree(*bind_ops); return err; } @@ -2920,13 +2954,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (args->num_binds) { - bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); + bos = kvcalloc(args->num_binds, sizeof(*bos), + GFP_KERNEL | __GFP_ACCOUNT); if (!bos) { err = -ENOMEM; goto release_vm_lock; } - ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); + ops = kvcalloc(args->num_binds, sizeof(*ops), + GFP_KERNEL | __GFP_ACCOUNT); if (!ops) { err = -ENOMEM; goto release_vm_lock; @@ -3067,10 +3103,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; bos && i < args->num_binds; ++i) xe_bo_put(bos[i]); - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; @@ -3094,10 +3130,10 @@ put_exec_queue: if (q) xe_exec_queue_put(q); free_objs: - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 5ac9c5bebabc..7300eea5394b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -19,6 +19,7 @@ struct xe_bo; struct xe_sync_entry; +struct xe_user_fence; struct xe_vm; #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS @@ -29,6 +30,8 @@ struct xe_vm; #define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 5) #define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6) #define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7) +#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 8) +#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 9) /** struct xe_userptr - User pointer */ struct xe_userptr { @@ -102,6 +105,12 @@ struct xe_vma { * @pat_index: The pat index to use when encoding the PTEs for this vma. */ u16 pat_index; + + /** + * @ufence: The user fence that was provided with MAP. + * Needs to be signalled before UNMAP can be processed. + */ + struct xe_user_fence *ufence; }; /** @@ -286,10 +295,6 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; - /** @immediate: Immediate bind */ - bool immediate; - /** @read_only: Read only */ - bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @pat_index: The pat index to use for this operation. */ diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 42fd504abbcd..89983d7d73ca 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -169,6 +169,7 @@ static const struct host1x_info host1x06_info = { .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, .reserve_vblank_syncpts = false, + .skip_reset_assert = true, }; static const struct host1x_sid_entry tegra194_sid_table[] = { @@ -680,13 +681,15 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev) host1x_intr_stop(host); host1x_syncpt_save(host); - err = reset_control_bulk_assert(host->nresets, host->resets); - if (err) { - dev_err(dev, "failed to assert reset: %d\n", err); - goto resume_host1x; - } + if (!host->info->skip_reset_assert) { + err = reset_control_bulk_assert(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + goto resume_host1x; + } - usleep_range(1000, 2000); + usleep_range(1000, 2000); + } clk_disable_unprepare(host->clk); reset_control_bulk_release(host->nresets, host->resets); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index c8e302de7625..925a118db23f 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -116,6 +116,12 @@ struct host1x_info { * the display driver disables VBLANK increments. */ bool reserve_vblank_syncpts; + /* + * On Tegra186, secure world applications may require access to + * host1x during suspend/resume. To allow this, we need to leave + * host1x not in reset. + */ + bool skip_reset_assert; }; struct host1x { diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 470ae2c29c94..e630caf644e8 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -176,9 +176,9 @@ __bpf_kfunc_end_defs(); * The following set contains all functions we agree BPF programs * can use. */ -BTF_SET8_START(hid_bpf_kfunc_ids) +BTF_KFUNCS_START(hid_bpf_kfunc_ids) BTF_ID_FLAGS(func, hid_bpf_get_data, KF_RET_NULL) -BTF_SET8_END(hid_bpf_kfunc_ids) +BTF_KFUNCS_END(hid_bpf_kfunc_ids) static const struct btf_kfunc_id_set hid_bpf_kfunc_set = { .owner = THIS_MODULE, @@ -487,12 +487,12 @@ static const struct btf_kfunc_id_set hid_bpf_fmodret_set = { }; /* for syscall HID-BPF */ -BTF_SET8_START(hid_bpf_syscall_kfunc_ids) +BTF_KFUNCS_START(hid_bpf_syscall_kfunc_ids) BTF_ID_FLAGS(func, hid_bpf_attach_prog) BTF_ID_FLAGS(func, hid_bpf_allocate_context, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, hid_bpf_release_context, KF_RELEASE) BTF_ID_FLAGS(func, hid_bpf_hw_request) -BTF_SET8_END(hid_bpf_syscall_kfunc_ids) +BTF_KFUNCS_END(hid_bpf_syscall_kfunc_ids) static const struct btf_kfunc_id_set hid_bpf_syscall_kfunc_set = { .owner = THIS_MODULE, diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 56f7e06c673e..adbf674355b2 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -322,125 +322,89 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, pagecount = hv_gpadl_size(type, size) >> HV_HYP_PAGE_SHIFT; - /* do we need a gpadl body msg */ pfnsize = MAX_SIZE_CHANNEL_MESSAGE - sizeof(struct vmbus_channel_gpadl_header) - sizeof(struct gpa_range); + pfncount = umin(pagecount, pfnsize / sizeof(u64)); + + msgsize = sizeof(struct vmbus_channel_msginfo) + + sizeof(struct vmbus_channel_gpadl_header) + + sizeof(struct gpa_range) + pfncount * sizeof(u64); + msgheader = kzalloc(msgsize, GFP_KERNEL); + if (!msgheader) + return -ENOMEM; + + INIT_LIST_HEAD(&msgheader->submsglist); + msgheader->msgsize = msgsize; + + gpadl_header = (struct vmbus_channel_gpadl_header *) + msgheader->msg; + gpadl_header->rangecount = 1; + gpadl_header->range_buflen = sizeof(struct gpa_range) + + pagecount * sizeof(u64); + gpadl_header->range[0].byte_offset = 0; + gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); + for (i = 0; i < pfncount; i++) + gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( + type, kbuffer, size, send_offset, i); + *msginfo = msgheader; + + pfnsum = pfncount; + pfnleft = pagecount - pfncount; + + /* how many pfns can we fit in a body message */ + pfnsize = MAX_SIZE_CHANNEL_MESSAGE - + sizeof(struct vmbus_channel_gpadl_body); pfncount = pfnsize / sizeof(u64); - if (pagecount > pfncount) { - /* we need a gpadl body */ - /* fill in the header */ + /* + * If pfnleft is zero, everything fits in the header and no body + * messages are needed + */ + while (pfnleft) { + pfncurr = umin(pfncount, pfnleft); msgsize = sizeof(struct vmbus_channel_msginfo) + - sizeof(struct vmbus_channel_gpadl_header) + - sizeof(struct gpa_range) + pfncount * sizeof(u64); - msgheader = kzalloc(msgsize, GFP_KERNEL); - if (!msgheader) - goto nomem; - - INIT_LIST_HEAD(&msgheader->submsglist); - msgheader->msgsize = msgsize; - - gpadl_header = (struct vmbus_channel_gpadl_header *) - msgheader->msg; - gpadl_header->rangecount = 1; - gpadl_header->range_buflen = sizeof(struct gpa_range) + - pagecount * sizeof(u64); - gpadl_header->range[0].byte_offset = 0; - gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); - for (i = 0; i < pfncount; i++) - gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( - type, kbuffer, size, send_offset, i); - *msginfo = msgheader; - - pfnsum = pfncount; - pfnleft = pagecount - pfncount; - - /* how many pfns can we fit */ - pfnsize = MAX_SIZE_CHANNEL_MESSAGE - - sizeof(struct vmbus_channel_gpadl_body); - pfncount = pfnsize / sizeof(u64); - - /* fill in the body */ - while (pfnleft) { - if (pfnleft > pfncount) - pfncurr = pfncount; - else - pfncurr = pfnleft; - - msgsize = sizeof(struct vmbus_channel_msginfo) + - sizeof(struct vmbus_channel_gpadl_body) + - pfncurr * sizeof(u64); - msgbody = kzalloc(msgsize, GFP_KERNEL); - - if (!msgbody) { - struct vmbus_channel_msginfo *pos = NULL; - struct vmbus_channel_msginfo *tmp = NULL; - /* - * Free up all the allocated messages. - */ - list_for_each_entry_safe(pos, tmp, - &msgheader->submsglist, - msglistentry) { - - list_del(&pos->msglistentry); - kfree(pos); - } - - goto nomem; - } - - msgbody->msgsize = msgsize; - gpadl_body = - (struct vmbus_channel_gpadl_body *)msgbody->msg; + sizeof(struct vmbus_channel_gpadl_body) + + pfncurr * sizeof(u64); + msgbody = kzalloc(msgsize, GFP_KERNEL); + if (!msgbody) { + struct vmbus_channel_msginfo *pos = NULL; + struct vmbus_channel_msginfo *tmp = NULL; /* - * Gpadl is u32 and we are using a pointer which could - * be 64-bit - * This is governed by the guest/host protocol and - * so the hypervisor guarantees that this is ok. + * Free up all the allocated messages. */ - for (i = 0; i < pfncurr; i++) - gpadl_body->pfn[i] = hv_gpadl_hvpfn(type, - kbuffer, size, send_offset, pfnsum + i); - - /* add to msg header */ - list_add_tail(&msgbody->msglistentry, - &msgheader->submsglist); - pfnsum += pfncurr; - pfnleft -= pfncurr; + list_for_each_entry_safe(pos, tmp, + &msgheader->submsglist, + msglistentry) { + + list_del(&pos->msglistentry); + kfree(pos); + } + kfree(msgheader); + return -ENOMEM; } - } else { - /* everything fits in a header */ - msgsize = sizeof(struct vmbus_channel_msginfo) + - sizeof(struct vmbus_channel_gpadl_header) + - sizeof(struct gpa_range) + pagecount * sizeof(u64); - msgheader = kzalloc(msgsize, GFP_KERNEL); - if (msgheader == NULL) - goto nomem; - - INIT_LIST_HEAD(&msgheader->submsglist); - msgheader->msgsize = msgsize; - - gpadl_header = (struct vmbus_channel_gpadl_header *) - msgheader->msg; - gpadl_header->rangecount = 1; - gpadl_header->range_buflen = sizeof(struct gpa_range) + - pagecount * sizeof(u64); - gpadl_header->range[0].byte_offset = 0; - gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); - for (i = 0; i < pagecount; i++) - gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( - type, kbuffer, size, send_offset, i); - - *msginfo = msgheader; + + msgbody->msgsize = msgsize; + gpadl_body = (struct vmbus_channel_gpadl_body *)msgbody->msg; + + /* + * Gpadl is u32 and we are using a pointer which could + * be 64-bit + * This is governed by the guest/host protocol and + * so the hypervisor guarantees that this is ok. + */ + for (i = 0; i < pfncurr; i++) + gpadl_body->pfn[i] = hv_gpadl_hvpfn(type, + kbuffer, size, send_offset, pfnsum + i); + + /* add to msg header */ + list_add_tail(&msgbody->msglistentry, &msgheader->submsglist); + pfnsum += pfncurr; + pfnleft -= pfncurr; } return 0; -nomem: - kfree(msgheader); - kfree(msgbody); - return -ENOMEM; } /* diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 42aec2c5606a..9c97c4065fe7 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -296,6 +296,11 @@ static struct { spinlock_t lock; } host_ts; +static bool timesync_implicit; + +module_param(timesync_implicit, bool, 0644); +MODULE_PARM_DESC(timesync_implicit, "If set treat SAMPLE as SYNC when clock is behind"); + static inline u64 reftime_to_ns(u64 reftime) { return (reftime - WLTIMEDELTA) * 100; @@ -345,6 +350,29 @@ static void hv_set_host_time(struct work_struct *work) } /* + * Due to a bug on Hyper-V hosts, the sync flag may not always be sent on resume. + * Force a sync if the guest is behind. + */ +static inline bool hv_implicit_sync(u64 host_time) +{ + struct timespec64 new_ts; + struct timespec64 threshold_ts; + + new_ts = ns_to_timespec64(reftime_to_ns(host_time)); + ktime_get_real_ts64(&threshold_ts); + + threshold_ts.tv_sec += 5; + + /* + * If guest behind the host by 5 or more seconds. + */ + if (timespec64_compare(&new_ts, &threshold_ts) >= 0) + return true; + + return false; +} + +/* * Synchronize time with host after reboot, restore, etc. * * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM. @@ -384,7 +412,8 @@ static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags) spin_unlock_irqrestore(&host_ts.lock, flags); /* Schedule work to do do_settimeofday64() */ - if (adj_flags & ICTIMESYNCFLAG_SYNC) + if ((adj_flags & ICTIMESYNCFLAG_SYNC) || + (timesync_implicit && hv_implicit_sync(host_ts.host_time))) schedule_work(&adj_time_work); } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index b33d5abd9beb..7f7965f3d187 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -988,7 +988,7 @@ static const struct dev_pm_ops vmbus_pm = { }; /* The one and only one */ -static struct bus_type hv_bus = { +static const struct bus_type hv_bus = { .name = "vmbus", .match = vmbus_match, .shutdown = vmbus_shutdown, diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c index 8d2ef3145bca..9fbab8f02334 100644 --- a/drivers/hwmon/nct6775-core.c +++ b/drivers/hwmon/nct6775-core.c @@ -3512,6 +3512,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit; const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL; int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp; + int num_reg_temp_config; struct device *hwmon_dev; struct sensor_template_group tsi_temp_tg; @@ -3594,6 +3595,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6106_REG_TEMP_CONFIG); reg_temp_alternate = NCT6106_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6106_REG_TEMP_CRIT; reg_temp_crit_l = NCT6106_REG_TEMP_CRIT_L; @@ -3669,6 +3671,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6106_REG_TEMP_CONFIG); reg_temp_alternate = NCT6106_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6106_REG_TEMP_CRIT; reg_temp_crit_l = NCT6106_REG_TEMP_CRIT_L; @@ -3746,6 +3749,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6775_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6775_REG_TEMP_CONFIG); reg_temp_alternate = NCT6775_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6775_REG_TEMP_CRIT; @@ -3821,6 +3825,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6776_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6776_REG_TEMP_CONFIG); reg_temp_alternate = NCT6776_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6776_REG_TEMP_CRIT; @@ -3900,6 +3905,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG); reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6779_REG_TEMP_CRIT; @@ -4034,6 +4040,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG); reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6779_REG_TEMP_CRIT; @@ -4123,6 +4130,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6798_REG_TEMP_OVER; reg_temp_hyst = NCT6798_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG); reg_temp_alternate = NCT6798_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6798_REG_TEMP_CRIT; @@ -4204,7 +4212,8 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, = reg_temp_crit[src - 1]; if (reg_temp_crit_l && reg_temp_crit_l[i]) data->reg_temp[4][src - 1] = reg_temp_crit_l[i]; - data->reg_temp_config[src - 1] = reg_temp_config[i]; + if (i < num_reg_temp_config) + data->reg_temp_config[src - 1] = reg_temp_config[i]; data->temp_src[src - 1] = src; continue; } @@ -4217,7 +4226,8 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, data->reg_temp[0][s] = reg_temp[i]; data->reg_temp[1][s] = reg_temp_over[i]; data->reg_temp[2][s] = reg_temp_hyst[i]; - data->reg_temp_config[s] = reg_temp_config[i]; + if (i < num_reg_temp_config) + data->reg_temp_config[s] = reg_temp_config[i]; if (reg_temp_crit_h && reg_temp_crit_h[i]) data->reg_temp[3][s] = reg_temp_crit_h[i]; else if (reg_temp_crit[src - 1]) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 88a053987403..60e813137f84 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -803,6 +803,11 @@ static irqreturn_t i2c_imx_slave_handle(struct imx_i2c_struct *i2c_imx, ctl &= ~I2CR_MTX; imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR); imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); + + /* flag the last byte as processed */ + i2c_imx_slave_event(i2c_imx, + I2C_SLAVE_READ_PROCESSED, &value); + i2c_imx_slave_finish_op(i2c_imx); return IRQ_HANDLED; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 05722121f00e..4a27fbdb2d84 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -292,10 +292,8 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, struct mm_struct *mm) { int ret; - unsigned long flags; struct arm_smmu_ctx_desc *cd; struct arm_smmu_mmu_notifier *smmu_mn; - struct arm_smmu_master *master; list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) { if (smmu_mn->mn.mm == mm) { @@ -325,28 +323,9 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, goto err_free_cd; } - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { - ret = arm_smmu_write_ctx_desc(master, mm_get_enqcmd_pasid(mm), - cd); - if (ret) { - list_for_each_entry_from_reverse( - master, &smmu_domain->devices, domain_head) - arm_smmu_write_ctx_desc( - master, mm_get_enqcmd_pasid(mm), NULL); - break; - } - } - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - if (ret) - goto err_put_notifier; - list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers); return smmu_mn; -err_put_notifier: - /* Frees smmu_mn */ - mmu_notifier_put(&smmu_mn->mn); err_free_cd: arm_smmu_free_shared_cd(cd); return ERR_PTR(ret); @@ -363,9 +342,6 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) list_del(&smmu_mn->list); - arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm), - NULL); - /* * If we went through clear(), we've already invalidated, and no * new TLB entry can have been formed. @@ -381,7 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) arm_smmu_free_shared_cd(cd); } -static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) +static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, + struct mm_struct *mm) { int ret; struct arm_smmu_bond *bond; @@ -404,9 +381,15 @@ static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) goto err_free_bond; } + ret = arm_smmu_write_ctx_desc(master, pasid, bond->smmu_mn->cd); + if (ret) + goto err_put_notifier; + list_add(&bond->list, &master->bonds); return 0; +err_put_notifier: + arm_smmu_mmu_notifier_put(bond->smmu_mn); err_free_bond: kfree(bond); return ret; @@ -568,6 +551,9 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, struct arm_smmu_master *master = dev_iommu_priv_get(dev); mutex_lock(&sva_lock); + + arm_smmu_write_ctx_desc(master, id, NULL); + list_for_each_entry(t, &master->bonds, list) { if (t->mm == mm) { bond = t; @@ -590,7 +576,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct mm_struct *mm = domain->mm; mutex_lock(&sva_lock); - ret = __arm_smmu_sva_bind(dev, mm); + ret = __arm_smmu_sva_bind(dev, id, mm); mutex_unlock(&sva_lock); return ret; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 68b6bc5e7c71..6317aaf7b3ab 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -859,10 +859,14 @@ static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain) arm_smmu_rpm_put(smmu); } -static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) +static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; + if (type != IOMMU_DOMAIN_UNMANAGED) { + if (using_legacy_binding || type != IOMMU_DOMAIN_DMA) + return NULL; + } /* * Allocate the domain and initialise some of its data structures. * We can't really do anything meaningful until we've added a @@ -875,15 +879,6 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) mutex_init(&smmu_domain->init_mutex); spin_lock_init(&smmu_domain->cb_lock); - if (dev) { - struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); - - if (arm_smmu_init_domain_context(smmu_domain, cfg->smmu, dev)) { - kfree(smmu_domain); - return NULL; - } - } - return &smmu_domain->domain; } @@ -1600,7 +1595,7 @@ static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, - .domain_alloc_paging = arm_smmu_domain_alloc_paging, + .domain_alloc = arm_smmu_domain_alloc, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .probe_finalize = arm_smmu_probe_finalize, diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6fb5f6fceea1..11652e0bcab3 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -396,8 +396,6 @@ static int domain_update_device_node(struct dmar_domain *domain) return nid; } -static void domain_update_iotlb(struct dmar_domain *domain); - /* Return the super pagesize bitmap if supported. */ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain) { @@ -1218,7 +1216,7 @@ domain_lookup_dev_info(struct dmar_domain *domain, return NULL; } -static void domain_update_iotlb(struct dmar_domain *domain) +void domain_update_iotlb(struct dmar_domain *domain) { struct dev_pasid_info *dev_pasid; struct device_domain_info *info; @@ -1368,6 +1366,46 @@ static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, spin_unlock_irqrestore(&domain->lock, flags); } +static void __iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, + unsigned long pfn, unsigned int pages, + int ih) +{ + unsigned int aligned_pages = __roundup_pow_of_two(pages); + unsigned long bitmask = aligned_pages - 1; + unsigned int mask = ilog2(aligned_pages); + u64 addr = (u64)pfn << VTD_PAGE_SHIFT; + + /* + * PSI masks the low order bits of the base address. If the + * address isn't aligned to the mask, then compute a mask value + * needed to ensure the target range is flushed. + */ + if (unlikely(bitmask & pfn)) { + unsigned long end_pfn = pfn + pages - 1, shared_bits; + + /* + * Since end_pfn <= pfn + bitmask, the only way bits + * higher than bitmask can differ in pfn and end_pfn is + * by carrying. This means after masking out bitmask, + * high bits starting with the first set bit in + * shared_bits are all equal in both pfn and end_pfn. + */ + shared_bits = ~(pfn ^ end_pfn) & ~bitmask; + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + } + + /* + * Fallback to domain selective flush if no PSI support or + * the size is too big. + */ + if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) + iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH); + else + iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, + DMA_TLB_PSI_FLUSH); +} + static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, struct dmar_domain *domain, unsigned long pfn, unsigned int pages, @@ -1384,42 +1422,10 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (ih) ih = 1 << 6; - if (domain->use_first_level) { + if (domain->use_first_level) domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); - } else { - unsigned long bitmask = aligned_pages - 1; - - /* - * PSI masks the low order bits of the base address. If the - * address isn't aligned to the mask, then compute a mask value - * needed to ensure the target range is flushed. - */ - if (unlikely(bitmask & pfn)) { - unsigned long end_pfn = pfn + pages - 1, shared_bits; - - /* - * Since end_pfn <= pfn + bitmask, the only way bits - * higher than bitmask can differ in pfn and end_pfn is - * by carrying. This means after masking out bitmask, - * high bits starting with the first set bit in - * shared_bits are all equal in both pfn and end_pfn. - */ - shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; - } - - /* - * Fallback to domain selective flush if no PSI support or - * the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || - mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, - DMA_TLB_PSI_FLUSH); - } + else + __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih); /* * In caching mode, changes of pages from non-present to present require @@ -1443,6 +1449,46 @@ static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain * iommu_flush_write_buffer(iommu); } +/* + * Flush the relevant caches in nested translation if the domain + * also serves as a parent + */ +static void parent_domain_flush(struct dmar_domain *domain, + unsigned long pfn, + unsigned long pages, int ih) +{ + struct dmar_domain *s1_domain; + + spin_lock(&domain->s1_lock); + list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + struct device_domain_info *device_info; + struct iommu_domain_info *info; + unsigned long flags; + unsigned long i; + + xa_for_each(&s1_domain->iommu_array, i, info) + __iommu_flush_iotlb_psi(info->iommu, info->did, + pfn, pages, ih); + + if (!s1_domain->has_iotlb_device) + continue; + + spin_lock_irqsave(&s1_domain->lock, flags); + list_for_each_entry(device_info, &s1_domain->devices, link) + /* + * Address translation cache in device side caches the + * result of nested translation. There is no easy way + * to identify the exact set of nested translations + * affected by a change in S2. So just flush the entire + * device cache. + */ + __iommu_flush_dev_iotlb(device_info, 0, + MAX_AGAW_PFN_WIDTH); + spin_unlock_irqrestore(&s1_domain->lock, flags); + } + spin_unlock(&domain->s1_lock); +} + static void intel_flush_iotlb_all(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); @@ -1462,6 +1508,9 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) if (!cap_caching_mode(iommu->cap)) iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH); } + + if (dmar_domain->nested_parent) + parent_domain_flush(dmar_domain, 0, -1, 0); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1985,6 +2034,9 @@ static void switch_to_super_page(struct dmar_domain *domain, iommu_flush_iotlb_psi(info->iommu, domain, start_pfn, lvl_pages, 0, 0); + if (domain->nested_parent) + parent_domain_flush(domain, start_pfn, + lvl_pages, 0); } pte++; @@ -3883,6 +3935,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; struct intel_iommu *iommu = info->iommu; + struct dmar_domain *dmar_domain; struct iommu_domain *domain; /* Must be NESTING domain */ @@ -3908,11 +3961,16 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, if (!domain) return ERR_PTR(-ENOMEM); - if (nested_parent) - to_dmar_domain(domain)->nested_parent = true; + dmar_domain = to_dmar_domain(domain); + + if (nested_parent) { + dmar_domain->nested_parent = true; + INIT_LIST_HEAD(&dmar_domain->s1_domains); + spin_lock_init(&dmar_domain->s1_lock); + } if (dirty_tracking) { - if (to_dmar_domain(domain)->use_first_level) { + if (dmar_domain->use_first_level) { iommu_domain_free(domain); return ERR_PTR(-EOPNOTSUPP); } @@ -3924,8 +3982,12 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, static void intel_iommu_domain_free(struct iommu_domain *domain) { + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + WARN_ON(dmar_domain->nested_parent && + !list_empty(&dmar_domain->s1_domains)); if (domain != &si_domain->domain) - domain_exit(to_dmar_domain(domain)); + domain_exit(dmar_domain); } int prepare_domain_attach_device(struct iommu_domain *domain, @@ -4107,6 +4169,9 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, start_pfn, nrpages, list_empty(&gather->freelist), 0); + if (dmar_domain->nested_parent) + parent_domain_flush(dmar_domain, start_pfn, nrpages, + list_empty(&gather->freelist)); put_pages_list(&gather->freelist); } @@ -4664,21 +4729,70 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) return vtd; } +/* + * Set dirty tracking for the device list of a domain. The caller must + * hold the domain->lock when calling it. + */ +static int device_set_dirty_tracking(struct list_head *devices, bool enable) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, devices, link) { + ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev, + IOMMU_NO_PASID, enable); + if (ret) + break; + } + + return ret; +} + +static int parent_domain_set_dirty_tracking(struct dmar_domain *domain, + bool enable) +{ + struct dmar_domain *s1_domain; + unsigned long flags; + int ret; + + spin_lock(&domain->s1_lock); + list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + spin_lock_irqsave(&s1_domain->lock, flags); + ret = device_set_dirty_tracking(&s1_domain->devices, enable); + spin_unlock_irqrestore(&s1_domain->lock, flags); + if (ret) + goto err_unwind; + } + spin_unlock(&domain->s1_lock); + return 0; + +err_unwind: + list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + spin_lock_irqsave(&s1_domain->lock, flags); + device_set_dirty_tracking(&s1_domain->devices, + domain->dirty_tracking); + spin_unlock_irqrestore(&s1_domain->lock, flags); + } + spin_unlock(&domain->s1_lock); + return ret; +} + static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, bool enable) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); - struct device_domain_info *info; int ret; spin_lock(&dmar_domain->lock); if (dmar_domain->dirty_tracking == enable) goto out_unlock; - list_for_each_entry(info, &dmar_domain->devices, link) { - ret = intel_pasid_setup_dirty_tracking(info->iommu, - info->domain, info->dev, - IOMMU_NO_PASID, enable); + ret = device_set_dirty_tracking(&dmar_domain->devices, enable); + if (ret) + goto err_unwind; + + if (dmar_domain->nested_parent) { + ret = parent_domain_set_dirty_tracking(dmar_domain, enable); if (ret) goto err_unwind; } @@ -4690,10 +4804,8 @@ out_unlock: return 0; err_unwind: - list_for_each_entry(info, &dmar_domain->devices, link) - intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain, - info->dev, IOMMU_NO_PASID, - dmar_domain->dirty_tracking); + device_set_dirty_tracking(&dmar_domain->devices, + dmar_domain->dirty_tracking); spin_unlock(&dmar_domain->lock); return ret; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d02f916d8e59..4145c04cb1c6 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -627,6 +627,10 @@ struct dmar_domain { int agaw; /* maximum mapped address */ u64 max_addr; + /* Protect the s1_domains list */ + spinlock_t s1_lock; + /* Track s1_domains nested on this domain */ + struct list_head s1_domains; }; /* Nested user domain */ @@ -637,6 +641,8 @@ struct dmar_domain { unsigned long s1_pgtbl; /* page table attributes */ struct iommu_hwpt_vtd_s1 s1_cfg; + /* link to parent domain siblings */ + struct list_head s2_link; }; }; @@ -1060,6 +1066,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, */ #define QI_OPT_WAIT_DRAIN BIT(0) +void domain_update_iotlb(struct dmar_domain *domain); int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void device_block_translation(struct device *dev); diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index f26c7f1c46cc..a7d68f3d518a 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -65,12 +65,20 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, list_add(&info->link, &dmar_domain->devices); spin_unlock_irqrestore(&dmar_domain->lock, flags); + domain_update_iotlb(dmar_domain); + return 0; } static void intel_nested_domain_free(struct iommu_domain *domain) { - kfree(to_dmar_domain(domain)); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct dmar_domain *s2_domain = dmar_domain->s2_domain; + + spin_lock(&s2_domain->s1_lock); + list_del(&dmar_domain->s2_link); + spin_unlock(&s2_domain->s1_lock); + kfree(dmar_domain); } static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, @@ -95,7 +103,7 @@ static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, } static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr, - unsigned long npages, bool ih) + u64 npages, bool ih) { struct iommu_domain_info *info; unsigned int mask; @@ -201,5 +209,9 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, spin_lock_init(&domain->lock); xa_init(&domain->iommu_array); + spin_lock(&s2_domain->s1_lock); + list_add(&domain->s2_link, &s2_domain->s1_domains); + spin_unlock(&s2_domain->s1_lock); + return &domain->domain; } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 3239cefa4c33..108158e2b907 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -428,7 +428,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, * Set up dirty tracking on a second only or nested translation type. */ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, - struct dmar_domain *domain, struct device *dev, u32 pasid, bool enabled) { @@ -445,7 +444,7 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, return -ENODEV; } - did = domain_id_iommu(domain, iommu); + did = pasid_get_domain_id(pte); pgtt = pasid_pte_get_pgtt(pte); if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && pgtt != PASID_ENTRY_PGTT_NESTED) { @@ -658,6 +657,8 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, pasid_set_domain_id(pte, did); pasid_set_address_width(pte, s2_domain->agaw); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + if (s2_domain->dirty_tracking) + pasid_set_ssade(pte); pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); pasid_set_present(pte); spin_unlock(&iommu->lock); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 8d40d4c66e31..487ede039bdd 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -307,7 +307,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid); int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, - struct dmar_domain *domain, struct device *dev, u32 pasid, bool enabled); int intel_pasid_setup_pass_through(struct intel_iommu *iommu, diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index c3fc9201d0be..65814cbc8402 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -41,6 +41,7 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de } iommu_mm->pasid = pasid; INIT_LIST_HEAD(&iommu_mm->sva_domains); + INIT_LIST_HEAD(&iommu_mm->sva_handles); /* * Make sure the write to mm->iommu_mm is not reordered in front of * initialization to iommu_mm fields. If it does, readers may see a @@ -82,6 +83,14 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_unlock; } + list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) { + if (handle->dev == dev) { + refcount_inc(&handle->users); + mutex_unlock(&iommu_sva_lock); + return handle; + } + } + handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) { ret = -ENOMEM; @@ -111,6 +120,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm list_add(&domain->next, &mm->iommu_mm->sva_domains); out: + refcount_set(&handle->users, 1); + list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); mutex_unlock(&iommu_sva_lock); handle->dev = dev; handle->domain = domain; @@ -141,6 +152,12 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) struct device *dev = handle->dev; mutex_lock(&iommu_sva_lock); + if (!refcount_dec_and_test(&handle->users)) { + mutex_unlock(&iommu_sva_lock); + return; + } + list_del(&handle->handle_item); + iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { list_del(&domain->next); diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 3f3f1fa1a0a9..33d142f8057d 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -263,7 +263,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) if (cmd->__reserved) return -EOPNOTSUPP; - if (cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len) + if ((cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len) || + (cmd->data_type != IOMMU_HWPT_DATA_NONE && !cmd->data_len)) return -EINVAL; idev = iommufd_get_device(ucmd, cmd->dev_id); diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 504ac1b01b2d..05fd9d3abf1b 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -1330,20 +1330,23 @@ out_unlock: int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) { + u32 new_id; int rc; down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); - rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access, - xa_limit_16b, GFP_KERNEL_ACCOUNT); + rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b, + GFP_KERNEL_ACCOUNT); + if (rc) goto out_unlock; rc = iopt_calculate_iova_alignment(iopt); if (rc) { - xa_erase(&iopt->access_list, access->iopt_access_list_id); + xa_erase(&iopt->access_list, new_id); goto out_unlock; } + access->iopt_access_list_id = new_id; out_unlock: up_write(&iopt->iova_rwsem); diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index 482d4059f5db..e854d3f67205 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -45,6 +45,7 @@ enum { enum { MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0, + MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1, }; enum { diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 0a92c9eeaf7f..db8c46bee155 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -100,7 +100,7 @@ struct iova_bitmap { struct iova_bitmap_map mapped; /* userspace address of the bitmap */ - u64 __user *bitmap; + u8 __user *bitmap; /* u64 index that @mapped points to */ unsigned long mapped_base_index; @@ -113,6 +113,9 @@ struct iova_bitmap { /* length of the IOVA range for the whole bitmap */ size_t length; + + /* length of the IOVA range set ahead the pinned pages */ + unsigned long set_ahead_length; }; /* @@ -162,7 +165,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) { struct iova_bitmap_map *mapped = &bitmap->mapped; unsigned long npages; - u64 __user *addr; + u8 __user *addr; long ret; /* @@ -176,17 +179,18 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) sizeof(*bitmap->bitmap), PAGE_SIZE); /* - * We always cap at max number of 'struct page' a base page can fit. - * This is, for example, on x86 means 2M of bitmap data max. - */ - npages = min(npages, PAGE_SIZE / sizeof(struct page *)); - - /* * Bitmap address to be pinned is calculated via pointer arithmetic * with bitmap u64 word index. */ addr = bitmap->bitmap + bitmap->mapped_base_index; + /* + * We always cap at max number of 'struct page' a base page can fit. + * This is, for example, on x86 means 2M of bitmap data max. + */ + npages = min(npages + !!offset_in_page(addr), + PAGE_SIZE / sizeof(struct page *)); + ret = pin_user_pages_fast((unsigned long)addr, npages, FOLL_WRITE, mapped->pages); if (ret <= 0) @@ -247,7 +251,7 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, mapped = &bitmap->mapped; mapped->pgshift = __ffs(page_size); - bitmap->bitmap = data; + bitmap->bitmap = (u8 __user *)data; bitmap->mapped_total_index = iova_bitmap_offset_to_index(bitmap, length - 1) + 1; bitmap->iova = iova; @@ -304,7 +308,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; remaining = min_t(unsigned long, remaining, - bytes / sizeof(*bitmap->bitmap)); + DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap))); return remaining; } @@ -341,6 +345,32 @@ static bool iova_bitmap_done(struct iova_bitmap *bitmap) return bitmap->mapped_base_index >= bitmap->mapped_total_index; } +static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap, + size_t set_ahead_length) +{ + int ret = 0; + + while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) { + unsigned long length = iova_bitmap_mapped_length(bitmap); + unsigned long iova = iova_bitmap_mapped_iova(bitmap); + + ret = iova_bitmap_get(bitmap); + if (ret) + break; + + length = min(length, set_ahead_length); + iova_bitmap_set(bitmap, iova, length); + + set_ahead_length -= length; + bitmap->mapped_base_index += + iova_bitmap_offset_to_index(bitmap, length - 1) + 1; + iova_bitmap_put(bitmap); + } + + bitmap->set_ahead_length = 0; + return ret; +} + /* * Advances to the next range, releases the current pinned * pages and pins the next set of bitmap pages. @@ -357,6 +387,15 @@ static int iova_bitmap_advance(struct iova_bitmap *bitmap) if (iova_bitmap_done(bitmap)) return 0; + /* Iterate, set and skip any bits requested for next iteration */ + if (bitmap->set_ahead_length) { + int ret; + + ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length); + if (ret) + return ret; + } + /* When advancing the index we pin the next set of bitmap pages */ return iova_bitmap_get(bitmap); } @@ -409,6 +448,7 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_page_idx = mapped->npages - 1; do { unsigned int page_idx = cur_bit / BITS_PER_PAGE; @@ -417,10 +457,18 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, last_bit - cur_bit + 1); void *kaddr; + if (unlikely(page_idx > last_page_idx)) + break; + kaddr = kmap_local_page(mapped->pages[page_idx]); bitmap_set(kaddr, offset, nbits); kunmap_local(kaddr); cur_bit += nbits; } while (cur_bit <= last_bit); + + if (unlikely(cur_bit <= last_bit)) { + bitmap->set_ahead_length = + ((last_bit - cur_bit + 1) << bitmap->mapped.pgshift); + } } EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index d9e9920c7eba..7a2199470f31 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -36,11 +36,12 @@ static struct mock_bus_type iommufd_mock_bus_type = { }, }; -static atomic_t mock_dev_num; +static DEFINE_IDA(mock_dev_ida); enum { MOCK_DIRTY_TRACK = 1, MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2, + MOCK_HUGE_PAGE_SIZE = 512 * MOCK_IO_PAGE_SIZE, /* * Like a real page table alignment requires the low bits of the address @@ -53,6 +54,7 @@ enum { MOCK_PFN_START_IOVA = _MOCK_PFN_START, MOCK_PFN_LAST_IOVA = _MOCK_PFN_START, MOCK_PFN_DIRTY_IOVA = _MOCK_PFN_START << 1, + MOCK_PFN_HUGE_IOVA = _MOCK_PFN_START << 2, }; /* @@ -61,8 +63,8 @@ enum { * In syzkaller mode the 64 bit IOVA is converted into an nth area and offset * value. This has a much smaller randomization space and syzkaller can hit it. */ -static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt, - u64 *iova) +static unsigned long __iommufd_test_syz_conv_iova(struct io_pagetable *iopt, + u64 *iova) { struct syz_layout { __u32 nth_area; @@ -86,6 +88,21 @@ static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt, return 0; } +static unsigned long iommufd_test_syz_conv_iova(struct iommufd_access *access, + u64 *iova) +{ + unsigned long ret; + + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return 0; + } + ret = __iommufd_test_syz_conv_iova(&access->ioas->iopt, iova); + mutex_unlock(&access->ioas_lock); + return ret; +} + void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, unsigned int ioas_id, u64 *iova, u32 *flags) { @@ -98,7 +115,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, ioas = iommufd_get_ioas(ucmd->ictx, ioas_id); if (IS_ERR(ioas)) return; - *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova); + *iova = __iommufd_test_syz_conv_iova(&ioas->iopt, iova); iommufd_put_object(ucmd->ictx, &ioas->obj); } @@ -121,6 +138,7 @@ enum selftest_obj_type { struct mock_dev { struct device dev; unsigned long flags; + int id; }; struct selftest_obj { @@ -191,6 +209,34 @@ static int mock_domain_set_dirty_tracking(struct iommu_domain *domain, return 0; } +static bool mock_test_and_clear_dirty(struct mock_iommu_domain *mock, + unsigned long iova, size_t page_size, + unsigned long flags) +{ + unsigned long cur, end = iova + page_size - 1; + bool dirty = false; + void *ent, *old; + + for (cur = iova; cur < end; cur += MOCK_IO_PAGE_SIZE) { + ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE); + if (!ent || !(xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA)) + continue; + + dirty = true; + /* Clear dirty */ + if (!(flags & IOMMU_DIRTY_NO_CLEAR)) { + unsigned long val; + + val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA; + old = xa_store(&mock->pfns, cur / MOCK_IO_PAGE_SIZE, + xa_mk_value(val), GFP_KERNEL); + WARN_ON_ONCE(ent != old); + } + } + + return dirty; +} + static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, unsigned long iova, size_t size, unsigned long flags, @@ -198,31 +244,31 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, { struct mock_iommu_domain *mock = container_of(domain, struct mock_iommu_domain, domain); - unsigned long i, max = size / MOCK_IO_PAGE_SIZE; - void *ent, *old; + unsigned long end = iova + size; + void *ent; if (!(mock->flags & MOCK_DIRTY_TRACK) && dirty->bitmap) return -EINVAL; - for (i = 0; i < max; i++) { - unsigned long cur = iova + i * MOCK_IO_PAGE_SIZE; + do { + unsigned long pgsize = MOCK_IO_PAGE_SIZE; + unsigned long head; - ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE); - if (ent && (xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA)) { - /* Clear dirty */ - if (!(flags & IOMMU_DIRTY_NO_CLEAR)) { - unsigned long val; - - val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA; - old = xa_store(&mock->pfns, - cur / MOCK_IO_PAGE_SIZE, - xa_mk_value(val), GFP_KERNEL); - WARN_ON_ONCE(ent != old); - } - iommu_dirty_bitmap_record(dirty, cur, - MOCK_IO_PAGE_SIZE); + ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); + if (!ent) { + iova += pgsize; + continue; } - } + + if (xa_to_value(ent) & MOCK_PFN_HUGE_IOVA) + pgsize = MOCK_HUGE_PAGE_SIZE; + head = iova & ~(pgsize - 1); + + /* Clear dirty */ + if (mock_test_and_clear_dirty(mock, head, pgsize, flags)) + iommu_dirty_bitmap_record(dirty, head, pgsize); + iova = head + pgsize; + } while (iova < end); return 0; } @@ -234,6 +280,7 @@ const struct iommu_dirty_ops dirty_ops = { static struct iommu_domain *mock_domain_alloc_paging(struct device *dev) { + struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); struct mock_iommu_domain *mock; mock = kzalloc(sizeof(*mock), GFP_KERNEL); @@ -242,6 +289,8 @@ static struct iommu_domain *mock_domain_alloc_paging(struct device *dev) mock->domain.geometry.aperture_start = MOCK_APERTURE_START; mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; + if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA) + mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE; mock->domain.ops = mock_ops.default_domain_ops; mock->domain.type = IOMMU_DOMAIN_UNMANAGED; xa_init(&mock->pfns); @@ -287,7 +336,7 @@ mock_domain_alloc_user(struct device *dev, u32 flags, return ERR_PTR(-EOPNOTSUPP); if (user_data || (has_dirty_flag && no_dirty_ops)) return ERR_PTR(-EOPNOTSUPP); - domain = mock_domain_alloc_paging(NULL); + domain = mock_domain_alloc_paging(dev); if (!domain) return ERR_PTR(-ENOMEM); if (has_dirty_flag) @@ -350,6 +399,9 @@ static int mock_domain_map_pages(struct iommu_domain *domain, if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) flags = MOCK_PFN_LAST_IOVA; + if (pgsize != MOCK_IO_PAGE_SIZE) { + flags |= MOCK_PFN_HUGE_IOVA; + } old = xa_store(&mock->pfns, iova / MOCK_IO_PAGE_SIZE, xa_mk_value((paddr / MOCK_IO_PAGE_SIZE) | flags), @@ -394,20 +446,27 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain, /* * iommufd generates unmaps that must be a strict - * superset of the map's performend So every starting - * IOVA should have been an iova passed to map, and the + * superset of the map's performend So every + * starting/ending IOVA should have been an iova passed + * to map. * - * First IOVA must be present and have been a first IOVA - * passed to map_pages + * This simple logic doesn't work when the HUGE_PAGE is + * turned on since the core code will automatically + * switch between the two page sizes creating a break in + * the unmap calls. The break can land in the middle of + * contiguous IOVA. */ - if (first) { - WARN_ON(ent && !(xa_to_value(ent) & - MOCK_PFN_START_IOVA)); - first = false; + if (!(domain->pgsize_bitmap & MOCK_HUGE_PAGE_SIZE)) { + if (first) { + WARN_ON(ent && !(xa_to_value(ent) & + MOCK_PFN_START_IOVA)); + first = false; + } + if (pgcount == 1 && + cur + MOCK_IO_PAGE_SIZE == pgsize) + WARN_ON(ent && !(xa_to_value(ent) & + MOCK_PFN_LAST_IOVA)); } - if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) - WARN_ON(ent && !(xa_to_value(ent) & - MOCK_PFN_LAST_IOVA)); iova += MOCK_IO_PAGE_SIZE; ret += MOCK_IO_PAGE_SIZE; @@ -595,7 +654,7 @@ static void mock_dev_release(struct device *dev) { struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); - atomic_dec(&mock_dev_num); + ida_free(&mock_dev_ida, mdev->id); kfree(mdev); } @@ -604,7 +663,8 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags) struct mock_dev *mdev; int rc; - if (dev_flags & ~(MOCK_FLAGS_DEVICE_NO_DIRTY)) + if (dev_flags & + ~(MOCK_FLAGS_DEVICE_NO_DIRTY | MOCK_FLAGS_DEVICE_HUGE_IOVA)) return ERR_PTR(-EINVAL); mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); @@ -616,8 +676,12 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags) mdev->dev.release = mock_dev_release; mdev->dev.bus = &iommufd_mock_bus_type.bus; - rc = dev_set_name(&mdev->dev, "iommufd_mock%u", - atomic_inc_return(&mock_dev_num)); + rc = ida_alloc(&mock_dev_ida, GFP_KERNEL); + if (rc < 0) + goto err_put; + mdev->id = rc; + + rc = dev_set_name(&mdev->dev, "iommufd_mock%u", mdev->id); if (rc) goto err_put; @@ -1119,7 +1183,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd, } if (flags & MOCK_FLAGS_ACCESS_SYZ) - iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt, + iova = iommufd_test_syz_conv_iova(staccess->access, &cmd->access_pages.iova); npages = (ALIGN(iova + length, PAGE_SIZE) - @@ -1221,8 +1285,8 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd, } if (flags & MOCK_FLAGS_ACCESS_SYZ) - iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt, - &cmd->access_rw.iova); + iova = iommufd_test_syz_conv_iova(staccess->access, + &cmd->access_rw.iova); rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags); if (rc) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 53abd4779914..b822752c4261 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3181,6 +3181,7 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR); +out: if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); @@ -3216,7 +3217,6 @@ static void its_cpu_init_lpis(void) /* Make sure the GIC has seen the above */ dsb(sy); -out: gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED; pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", smp_processor_id(), diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 5101a3fb11df..58881d313979 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -235,22 +235,17 @@ static const struct irq_domain_ops mbigen_domain_ops = { static int mbigen_of_create_domain(struct platform_device *pdev, struct mbigen_device *mgn_chip) { - struct device *parent; struct platform_device *child; struct irq_domain *domain; struct device_node *np; u32 num_pins; int ret = 0; - parent = bus_get_dev_root(&platform_bus_type); - if (!parent) - return -ENODEV; - for_each_child_of_node(pdev->dev.of_node, np) { if (!of_property_read_bool(np, "interrupt-controller")) continue; - child = of_platform_device_create(np, NULL, parent); + child = of_platform_device_create(np, NULL, NULL); if (!child) { ret = -ENOMEM; break; @@ -273,7 +268,6 @@ static int mbigen_of_create_domain(struct platform_device *pdev, } } - put_device(parent); if (ret) of_node_put(np); diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 5b7bc4fd9517..bf0b40b0fad4 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -148,7 +148,13 @@ static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + if (unlikely(irqd_irq_disabled(d))) { + plic_toggle(handler, d->hwirq, 1); + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + plic_toggle(handler, d->hwirq, 0); + } else { + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + } } #ifdef CONFIG_SMP diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 6e80d7bd3c4d..3ed257334562 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -49,7 +49,9 @@ MODULE_LICENSE("GPL"); /* -------- driver information -------------------------------------- */ static DEFINE_MUTEX(capi_mutex); -static struct class *capi_class; +static const struct class capi_class = { + .name = "capi", +}; static int capi_major = 68; /* allocated */ module_param_named(major, capi_major, uint, 0); @@ -1393,18 +1395,19 @@ static int __init capi_init(void) kcapi_exit(); return major_ret; } - capi_class = class_create("capi"); - if (IS_ERR(capi_class)) { + + ret = class_register(&capi_class); + if (ret) { unregister_chrdev(capi_major, "capi20"); kcapi_exit(); - return PTR_ERR(capi_class); + return ret; } - device_create(capi_class, NULL, MKDEV(capi_major, 0), NULL, "capi20"); + device_create(&capi_class, NULL, MKDEV(capi_major, 0), NULL, "capi20"); if (capinc_tty_init() < 0) { - device_destroy(capi_class, MKDEV(capi_major, 0)); - class_destroy(capi_class); + device_destroy(&capi_class, MKDEV(capi_major, 0)); + class_unregister(&capi_class); unregister_chrdev(capi_major, "capi20"); kcapi_exit(); return -ENOMEM; @@ -1427,8 +1430,8 @@ static void __exit capi_exit(void) { proc_exit(); - device_destroy(capi_class, MKDEV(capi_major, 0)); - class_destroy(capi_class); + device_destroy(&capi_class, MKDEV(capi_major, 0)); + class_unregister(&capi_class); unregister_chrdev(capi_major, "capi20"); capinc_tty_exit(); diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c index 09b72f14d4b7..b4ed0bb8ddfb 100644 --- a/drivers/isdn/mISDN/dsp_pipeline.c +++ b/drivers/isdn/mISDN/dsp_pipeline.c @@ -31,7 +31,9 @@ struct dsp_element_entry { static LIST_HEAD(dsp_elements); /* sysfs */ -static struct class *elements_class; +static const struct class elements_class = { + .name = "dsp_pipeline", +}; static ssize_t attr_show_args(struct device *dev, struct device_attribute *attr, char *buf) @@ -80,7 +82,7 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem) INIT_LIST_HEAD(&entry->list); entry->elem = elem; - entry->dev.class = elements_class; + entry->dev.class = &elements_class; entry->dev.release = mISDN_dsp_dev_release; dev_set_drvdata(&entry->dev, elem); dev_set_name(&entry->dev, "%s", elem->name); @@ -131,9 +133,11 @@ EXPORT_SYMBOL(mISDN_dsp_element_unregister); int dsp_pipeline_module_init(void) { - elements_class = class_create("dsp_pipeline"); - if (IS_ERR(elements_class)) - return PTR_ERR(elements_class); + int err; + + err = class_register(&elements_class); + if (err) + return err; dsp_hwec_init(); @@ -146,7 +150,7 @@ void dsp_pipeline_module_exit(void) dsp_hwec_exit(); - class_destroy(elements_class); + class_unregister(&elements_class); list_for_each_entry_safe(entry, n, &dsp_elements, list) { list_del(&entry->list); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f745f8508243..59445763e55a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -53,15 +53,17 @@ struct convert_context { struct completion restart; struct bio *bio_in; - struct bio *bio_out; struct bvec_iter iter_in; + struct bio *bio_out; struct bvec_iter iter_out; - u64 cc_sector; atomic_t cc_pending; + u64 cc_sector; union { struct skcipher_request *req; struct aead_request *req_aead; } r; + bool aead_recheck; + bool aead_failed; }; @@ -82,6 +84,8 @@ struct dm_crypt_io { blk_status_t error; sector_t sector; + struct bvec_iter saved_bi_iter; + struct rb_node rb_node; } CRYPTO_MINALIGN_ATTR; @@ -1370,10 +1374,13 @@ static int crypt_convert_block_aead(struct crypt_config *cc, if (r == -EBADMSG) { sector_t s = le64_to_cpu(*sector); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } } if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) @@ -1757,6 +1764,8 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->base_bio = bio; io->sector = sector; io->error = 0; + io->ctx.aead_recheck = false; + io->ctx.aead_failed = false; io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; @@ -1768,6 +1777,8 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } +static void kcryptd_queue_read(struct dm_crypt_io *io); + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1781,6 +1792,15 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->io_pending)) return; + if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) && + cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) { + io->ctx.aead_recheck = true; + io->ctx.aead_failed = false; + io->error = 0; + kcryptd_queue_read(io); + return; + } + if (io->ctx.r.req) crypt_free_req(cc, io->ctx.r.req, base_bio); @@ -1816,15 +1836,19 @@ static void crypt_endio(struct bio *clone) struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; unsigned int rw = bio_data_dir(clone); - blk_status_t error; + blk_status_t error = clone->bi_status; + + if (io->ctx.aead_recheck && !error) { + kcryptd_queue_crypt(io); + return; + } /* * free the processed pages */ - if (rw == WRITE) + if (rw == WRITE || io->ctx.aead_recheck) crypt_free_buffer_pages(cc, clone); - error = clone->bi_status; bio_put(clone); if (rw == READ && !error) { @@ -1845,6 +1869,22 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) struct crypt_config *cc = io->cc; struct bio *clone; + if (io->ctx.aead_recheck) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return 1; + crypt_inc_pending(io); + clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); + if (unlikely(!clone)) { + crypt_dec_pending(io); + return 1; + } + clone->bi_iter.bi_sector = cc->start + io->sector; + crypt_convert_init(cc, &io->ctx, clone, clone, io->sector); + io->saved_bi_iter = clone->bi_iter; + dm_submit_bio_remap(io->base_bio, clone); + return 0; + } + /* * We need the original biovec array in order to decrypt the whole bio * data *afterwards* -- thanks to immutable biovecs we don't need to @@ -2071,6 +2111,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) io->ctx.bio_out = clone; io->ctx.iter_out = clone->bi_iter; + if (crypt_integrity_aead(cc)) { + bio_copy_data(clone, io->base_bio); + io->ctx.bio_in = clone; + io->ctx.iter_in = clone->bi_iter; + } + sector += bio_sectors(clone); crypt_inc_pending(io); @@ -2107,6 +2153,14 @@ dec: static void kcryptd_crypt_read_done(struct dm_crypt_io *io) { + if (io->ctx.aead_recheck) { + if (!io->error) { + io->ctx.bio_in->bi_iter = io->saved_bi_iter; + bio_copy_data(io->base_bio, io->ctx.bio_in); + } + crypt_free_buffer_pages(io->cc, io->ctx.bio_in); + bio_put(io->ctx.bio_in); + } crypt_dec_pending(io); } @@ -2136,11 +2190,17 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_inc_pending(io); - crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, - io->sector); + if (io->ctx.aead_recheck) { + io->ctx.cc_sector = io->sector + cc->iv_offset; + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } else { + crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, + io->sector); - r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } /* * Crypto API backlogged the request, because its queue was full * and we're in softirq context, so continue from a workqueue @@ -2182,10 +2242,13 @@ static void kcryptd_async_done(void *data, int error) if (error == -EBADMSG) { sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } io->error = BLK_STS_PROTECTION; } else if (error < 0) io->error = BLK_STS_IOERR; @@ -3110,7 +3173,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar sval = strchr(opt_string + strlen("integrity:"), ':') + 1; if (!strcasecmp(sval, "aead")) { set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); - } else if (strcasecmp(sval, "none")) { + } else if (strcasecmp(sval, "none")) { ti->error = "Unknown integrity profile"; return -EINVAL; } @@ -3639,7 +3702,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 24, 0}, + .version = {1, 25, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index c5f03aab4552..1fc901df84eb 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -278,6 +278,8 @@ struct dm_integrity_c { atomic64_t number_of_mismatches; + mempool_t recheck_pool; + struct notifier_block reboot_notifier; }; @@ -1689,6 +1691,77 @@ failed: get_random_bytes(result, ic->tag_size); } +static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum) +{ + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + struct dm_integrity_c *ic = dio->ic; + struct bvec_iter iter; + struct bio_vec bv; + sector_t sector, logical_sector, area, offset; + struct page *page; + void *buffer; + + get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); + dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, + &dio->metadata_offset); + sector = get_data_sector(ic, area, offset); + logical_sector = dio->range.logical_sector; + + page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { + unsigned pos = 0; + + do { + char *mem; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = sector; + io_loc.count = ic->sectors_per_block; + + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) { + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + integrity_sector_checksum(ic, logical_sector, buffer, checksum); + r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block, + &dio->metadata_offset, ic->tag_size, TAG_CMP); + if (r) { + if (r > 0) { + DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", + bio->bi_bdev, logical_sector); + atomic64_inc(&ic->number_of_mismatches); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", + bio, logical_sector, 0); + r = -EILSEQ; + } + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + mem = bvec_kmap_local(&bv); + memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT); + kunmap_local(mem); + + pos += ic->sectors_per_block << SECTOR_SHIFT; + sector += ic->sectors_per_block; + logical_sector += ic->sectors_per_block; + } while (pos < bv.bv_len); + } +free_ret: + mempool_free(page, &ic->recheck_pool); +} + static void integrity_metadata(struct work_struct *w) { struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); @@ -1776,15 +1849,8 @@ again: checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - sector_t s; - - s = sector - ((r + ic->tag_size - 1) / ic->tag_size); - DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", - bio->bi_bdev, s); - r = -EILSEQ; - atomic64_inc(&ic->number_of_mismatches); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", - bio, s, 0); + integrity_recheck(dio, checksums); + goto skip_io; } if (likely(checksums != checksums_onstack)) kfree(checksums); @@ -4261,6 +4327,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv goto bad; } + r = mempool_init_page_pool(&ic->recheck_pool, 1, 0); + if (r) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); if (!ic->metadata_wq) { @@ -4609,6 +4681,7 @@ static void dm_integrity_dtr(struct dm_target *ti) kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); + mempool_exit(&ic->recheck_pool); mempool_exit(&ic->journal_io_mempool); if (ic->io) dm_io_client_destroy(ic->io); @@ -4661,7 +4734,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 82662f5769c4..1b591bfa90d5 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -482,6 +482,63 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, return 0; } +static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + memcpy(data, io->recheck_buffer, len); + io->recheck_buffer += len; + + return 0; +} + +static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) +{ + struct page *page; + void *buffer; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + + page = mempool_alloc(&v->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = v->io; + io_loc.bdev = v->data_dev->bdev; + io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT); + io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT); + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) + goto free_ret; + + r = verity_hash(v, verity_io_hash_req(v, io), buffer, + 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io), true); + if (unlikely(r)) + goto free_ret; + + if (memcmp(verity_io_real_digest(v, io), + verity_io_want_digest(v, io), v->digest_size)) { + r = -EIO; + goto free_ret; + } + + io->recheck_buffer = buffer; + r = verity_for_bv_block(v, io, &start, verity_recheck_copy); + if (unlikely(r)) + goto free_ret; + + r = 0; +free_ret: + mempool_free(page, &v->recheck_pool); + + return r; +} + static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len) { @@ -508,9 +565,7 @@ static int verity_verify_io(struct dm_verity_io *io) { bool is_zero; struct dm_verity *v = io->v; -#if defined(CONFIG_DM_VERITY_FEC) struct bvec_iter start; -#endif struct bvec_iter iter_copy; struct bvec_iter *iter; struct crypto_wait wait; @@ -561,10 +616,7 @@ static int verity_verify_io(struct dm_verity_io *io) if (unlikely(r < 0)) return r; -#if defined(CONFIG_DM_VERITY_FEC) - if (verity_fec_is_enabled(v)) - start = *iter; -#endif + start = *iter; r = verity_for_io_block(v, io, iter, &wait); if (unlikely(r < 0)) return r; @@ -586,6 +638,10 @@ static int verity_verify_io(struct dm_verity_io *io) * tasklet since it may sleep, so fallback to work-queue. */ return -EAGAIN; + } else if (verity_recheck(v, io, start, cur_block) == 0) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); + continue; #if defined(CONFIG_DM_VERITY_FEC) } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, cur_block, NULL, &start) == 0) { @@ -941,6 +997,10 @@ static void verity_dtr(struct dm_target *ti) if (v->verify_wq) destroy_workqueue(v->verify_wq); + mempool_exit(&v->recheck_pool); + if (v->io) + dm_io_client_destroy(v->io); + if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -1379,6 +1439,20 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) } v->hash_blocks = hash_position; + r = mempool_init_page_pool(&v->recheck_pool, 1, 0); + if (unlikely(r)) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + + v->io = dm_io_client_create(); + if (IS_ERR(v->io)) { + r = PTR_ERR(v->io); + v->io = NULL; + ti->error = "Cannot allocate dm io"; + goto bad; + } + v->bufio = dm_bufio_client_create(v->hash_dev->bdev, 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux), dm_bufio_alloc_callback, NULL, @@ -1486,7 +1560,7 @@ int dm_verity_get_root_digest(struct dm_target *ti, u8 **root_digest, unsigned i static struct target_type verity_target = { .name = "verity", .features = DM_TARGET_IMMUTABLE, - .version = {1, 9, 0}, + .version = {1, 10, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f3f607008419..db93a91169d5 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -11,6 +11,7 @@ #ifndef DM_VERITY_H #define DM_VERITY_H +#include <linux/dm-io.h> #include <linux/dm-bufio.h> #include <linux/device-mapper.h> #include <linux/interrupt.h> @@ -68,6 +69,9 @@ struct dm_verity { unsigned long *validated_blocks; /* bitset blocks validated */ char *signature_key_desc; /* signature keyring reference */ + + struct dm_io_client *io; + mempool_t recheck_pool; }; struct dm_verity_io { @@ -76,14 +80,16 @@ struct dm_verity_io { /* original value of bio->bi_end_io */ bio_end_io_t *orig_bi_end_io; + struct bvec_iter iter; + sector_t block; unsigned int n_blocks; bool in_tasklet; - struct bvec_iter iter; - struct work_struct work; + char *recheck_buffer; + /* * Three variably-size fields follow this struct: * diff --git a/drivers/md/md.c b/drivers/md/md.c index 2266358d8074..9e41a9aaba8b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -579,8 +579,12 @@ static void submit_flushes(struct work_struct *ws) rcu_read_lock(); } rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pair is percpu_ref_get() from md_flush_request() */ + percpu_ref_put(&mddev->active_io); + queue_work(md_wq, &mddev->flush_work); + } } static void md_submit_flush_data(struct work_struct *ws) @@ -8788,12 +8792,16 @@ void md_do_sync(struct md_thread *thread) int ret; /* just incase thread restarts... */ - if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) || - test_bit(MD_RECOVERY_WAIT, &mddev->recovery)) + if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) return; - if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */ + + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) + goto skip; + + if (test_bit(MD_RECOVERY_WAIT, &mddev->recovery) || + !md_is_rdwr(mddev)) {/* never try to sync a read-only array */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); - return; + goto skip; } if (mddev_is_clustered(mddev)) { @@ -9368,13 +9376,19 @@ static void md_start_sync(struct work_struct *ws) struct mddev *mddev = container_of(ws, struct mddev, sync_work); int spares = 0; bool suspend = false; + char *name; - if (md_spares_need_change(mddev)) + /* + * If reshape is still in progress, spares won't be added or removed + * from conf until reshape is done. + */ + if (mddev->reshape_position == MaxSector && + md_spares_need_change(mddev)) { suspend = true; + mddev_suspend(mddev, false); + } - suspend ? mddev_suspend_and_lock_nointr(mddev) : - mddev_lock_nointr(mddev); - + mddev_lock_nointr(mddev); if (!md_is_rdwr(mddev)) { /* * On a read-only array we can: @@ -9400,8 +9414,10 @@ static void md_start_sync(struct work_struct *ws) if (spares) md_bitmap_write_all(mddev->bitmap); + name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ? + "reshape" : "resync"; rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "resync")); + md_register_thread(md_do_sync, mddev, name)); if (!mddev->sync_thread) { pr_warn("%s: could not start resync thread...\n", mdname(mddev)); @@ -9445,6 +9461,20 @@ not_running: sysfs_notify_dirent_safe(mddev->sysfs_action); } +static void unregister_sync_thread(struct mddev *mddev) +{ + if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { + /* resync/recovery still happening */ + clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + return; + } + + if (WARN_ON_ONCE(!mddev->sync_thread)) + return; + + md_reap_sync_thread(mddev); +} + /* * This routine is regularly called by all per-raid-array threads to * deal with generic issues like resync and super-block update. @@ -9469,9 +9499,6 @@ not_running: */ void md_check_recovery(struct mddev *mddev) { - if (READ_ONCE(mddev->suspended)) - return; - if (mddev->bitmap) md_bitmap_daemon_work(mddev); @@ -9485,7 +9512,8 @@ void md_check_recovery(struct mddev *mddev) } if (!md_is_rdwr(mddev) && - !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) + !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) && + !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) return; if ( ! ( (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) || @@ -9507,8 +9535,7 @@ void md_check_recovery(struct mddev *mddev) struct md_rdev *rdev; if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { - /* sync_work already queued. */ - clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + unregister_sync_thread(mddev); goto unlock; } @@ -9571,16 +9598,7 @@ void md_check_recovery(struct mddev *mddev) * still set. */ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { - if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { - /* resync/recovery still happening */ - clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - goto unlock; - } - - if (WARN_ON_ONCE(!mddev->sync_thread)) - goto unlock; - - md_reap_sync_thread(mddev); + unregister_sync_thread(mddev); goto unlock; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7412066ea22c..a5f8419e2df1 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4175,11 +4175,7 @@ static int raid10_run(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) - goto out_free_conf; + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } return 0; @@ -4573,16 +4569,8 @@ out: clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) { - ret = -EAGAIN; - goto abort; - } + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); conf->reshape_checkpoint = jiffies; - md_wakeup_thread(mddev->sync_thread); md_new_event(); return 0; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8497880135ee..6a7a32f7fb91 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7936,11 +7936,7 @@ static int raid5_run(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) - goto abort; + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } /* Ok, everything is just fine now */ @@ -8506,29 +8502,8 @@ static int raid5_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) { - mddev->recovery = 0; - spin_lock_irq(&conf->device_lock); - write_seqcount_begin(&conf->gen_lock); - mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; - mddev->new_chunk_sectors = - conf->chunk_sectors = conf->prev_chunk_sectors; - mddev->new_layout = conf->algorithm = conf->prev_algo; - rdev_for_each(rdev, mddev) - rdev->new_data_offset = rdev->data_offset; - smp_wmb(); - conf->generation --; - conf->reshape_progress = MaxSector; - mddev->reshape_position = MaxSector; - write_seqcount_end(&conf->gen_lock); - spin_unlock_irq(&conf->device_lock); - return -EAGAIN; - } + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); conf->reshape_checkpoint = jiffies; - md_wakeup_thread(mddev->sync_thread); md_new_event(); return 0; } diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index f410bee50132..58ed7193a3ca 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1015,10 +1015,12 @@ static int mmc_select_bus_width(struct mmc_card *card) static unsigned ext_csd_bits[] = { EXT_CSD_BUS_WIDTH_8, EXT_CSD_BUS_WIDTH_4, + EXT_CSD_BUS_WIDTH_1, }; static unsigned bus_widths[] = { MMC_BUS_WIDTH_8, MMC_BUS_WIDTH_4, + MMC_BUS_WIDTH_1, }; struct mmc_host *host = card->host; unsigned idx, bus_width = 0; diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 35067e1e6cd8..f5da7f9baa52 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -225,6 +225,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) struct scatterlist *sg; int i; + host->dma_in_progress = true; + if (!host->variant->dma_lli || data->sg_len == 1 || idma->use_bounce_buffer) { u32 dma_addr; @@ -263,9 +265,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) return 0; } +static void sdmmc_idma_error(struct mmci_host *host) +{ + struct mmc_data *data = host->data; + struct sdmmc_idma *idma = host->dma_priv; + + if (!dma_inprogress(host)) + return; + + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; + data->host_cookie = 0; + + if (!idma->use_bounce_buffer) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + mmc_get_dma_dir(data)); +} + static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data) { + if (!dma_inprogress(host)) + return; + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; if (!data->host_cookie) sdmmc_idma_unprep_data(host, data, 0); @@ -676,6 +699,7 @@ static struct mmci_host_ops sdmmc_variant_ops = { .dma_setup = sdmmc_idma_setup, .dma_start = sdmmc_idma_start, .dma_finalize = sdmmc_idma_finalize, + .dma_error = sdmmc_idma_error, .set_clkreg = mmci_sdmmc_set_clkreg, .set_pwrreg = mmci_sdmmc_set_pwrreg, .busy_complete = sdmmc_busy_complete, diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 8cf3a375de65..cc9d28b75eb9 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/delay.h> #include <linux/ktime.h> +#include <linux/iopoll.h> #include <linux/of_address.h> #include "sdhci-pltfm.h" @@ -109,6 +110,8 @@ #define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18) #define XENON_LOGIC_TIMING_VALUE 0x00AA8977 +#define XENON_MAX_PHY_TIMEOUT_LOOPS 100 + /* * List offset of PHY registers and some special register values * in eMMC PHY 5.0 or eMMC PHY 5.1 @@ -216,6 +219,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host) return 0; } +static int xenon_check_stability_internal_clk(struct sdhci_host *host) +{ + u32 reg; + int err; + + err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE, + 1100, 20000, false, host, SDHCI_CLOCK_CONTROL); + if (err) + dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n"); + + return err; +} + /* * eMMC 5.0/5.1 PHY init/re-init. * eMMC PHY init should be executed after: @@ -232,6 +248,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs; + int ret = xenon_check_stability_internal_clk(host); + + if (ret) + return ret; + reg = sdhci_readl(host, phy_regs->timing_adj); reg |= XENON_PHY_INITIALIZAION; sdhci_writel(host, reg, phy_regs->timing_adj); @@ -259,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) /* get the wait time */ wait /= clock; wait++; - /* wait for host eMMC PHY init completes */ - udelay(wait); - reg = sdhci_readl(host, phy_regs->timing_adj); - reg &= XENON_PHY_INITIALIZAION; - if (reg) { + /* + * AC5X spec says bit must be polled until zero. + * We see cases in which timeout can take longer + * than the standard calculation on AC5X, which is + * expected following the spec comment above. + * According to the spec, we must wait as long as + * it takes for that bit to toggle on AC5X. + * Cap that with 100 delay loops so we won't get + * stuck here forever: + */ + + ret = read_poll_timeout(sdhci_readl, reg, + !(reg & XENON_PHY_INITIALIZAION), + wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait, + false, host, phy_regs->timing_adj); + if (ret) dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n", - wait); - return -ETIMEDOUT; - } + wait * XENON_MAX_PHY_TIMEOUT_LOOPS); - return 0; + return ret; } #define ARMADA_3700_SOC_PAD_1_8V 0x1 diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index e451b28840d5..5887feb347a4 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -621,6 +621,7 @@ static void mtd_check_of_node(struct mtd_info *mtd) if (plen == mtd_name_len && !strncmp(mtd->name, pname + offset, plen)) { mtd_set_of_node(mtd, mtd_dn); + of_node_put(mtd_dn); break; } } diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index a46698744850..5b0f5a9cef81 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -290,16 +290,13 @@ static const struct marvell_hw_ecc_layout marvell_nfc_layouts[] = { MARVELL_LAYOUT( 2048, 512, 4, 1, 1, 2048, 32, 30, 0, 0, 0), MARVELL_LAYOUT( 2048, 512, 8, 2, 1, 1024, 0, 30,1024,32, 30), MARVELL_LAYOUT( 2048, 512, 8, 2, 1, 1024, 0, 30,1024,64, 30), - MARVELL_LAYOUT( 2048, 512, 12, 3, 2, 704, 0, 30,640, 0, 30), - MARVELL_LAYOUT( 2048, 512, 16, 5, 4, 512, 0, 30, 0, 32, 30), + MARVELL_LAYOUT( 2048, 512, 16, 4, 4, 512, 0, 30, 0, 32, 30), MARVELL_LAYOUT( 4096, 512, 4, 2, 2, 2048, 32, 30, 0, 0, 0), - MARVELL_LAYOUT( 4096, 512, 8, 5, 4, 1024, 0, 30, 0, 64, 30), - MARVELL_LAYOUT( 4096, 512, 12, 6, 5, 704, 0, 30,576, 32, 30), - MARVELL_LAYOUT( 4096, 512, 16, 9, 8, 512, 0, 30, 0, 32, 30), + MARVELL_LAYOUT( 4096, 512, 8, 4, 4, 1024, 0, 30, 0, 64, 30), + MARVELL_LAYOUT( 4096, 512, 16, 8, 8, 512, 0, 30, 0, 32, 30), MARVELL_LAYOUT( 8192, 512, 4, 4, 4, 2048, 0, 30, 0, 0, 0), - MARVELL_LAYOUT( 8192, 512, 8, 9, 8, 1024, 0, 30, 0, 160, 30), - MARVELL_LAYOUT( 8192, 512, 12, 12, 11, 704, 0, 30,448, 64, 30), - MARVELL_LAYOUT( 8192, 512, 16, 17, 16, 512, 0, 30, 0, 32, 30), + MARVELL_LAYOUT( 8192, 512, 8, 8, 8, 1024, 0, 30, 0, 160, 30), + MARVELL_LAYOUT( 8192, 512, 16, 16, 16, 512, 0, 30, 0, 32, 30), }; /** diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c index 987710e09441..6023cba748bb 100644 --- a/drivers/mtd/nand/spi/gigadevice.c +++ b/drivers/mtd/nand/spi/gigadevice.c @@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, * report the maximum of 4 in this case */ /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status & STATUS_ECC_MASK) >> 2) | ((status2 & STATUS_ECC_MASK) >> 4); @@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, * 1 ... 4 bits are flipped (and corrected) */ /* bits sorted this way (1...0): ECCSE1, ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status2 & STATUS_ECC_MASK) >> 4) + 1; case STATUS_ECC_UNCOR_ERROR: diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 4db6122c9b43..339db6e4a1d5 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -194,15 +194,10 @@ static int bareudp_init(struct net_device *dev) struct bareudp_dev *bareudp = netdev_priv(dev); int err; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - err = gro_cells_init(&bareudp->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); + if (err) return err; - } + return 0; } @@ -211,7 +206,6 @@ static void bareudp_uninit(struct net_device *dev) struct bareudp_dev *bareudp = netdev_priv(dev); gro_cells_destroy(&bareudp->gro_cells); - free_percpu(dev->tstats); } static struct socket *bareudp_create_sock(struct net *net, __be16 port) @@ -529,7 +523,6 @@ static const struct net_device_ops bareudp_netdev_ops = { .ndo_open = bareudp_open, .ndo_stop = bareudp_stop, .ndo_start_xmit = bareudp_xmit, - .ndo_get_stats64 = dev_get_tstats64, .ndo_fill_metadata_dst = bareudp_fill_metadata_dst, }; @@ -567,6 +560,7 @@ static void bareudp_setup(struct net_device *dev) netif_keep_dst(dev); dev->priv_flags |= IFF_NO_QUEUE; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; } static int bareudp_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ae1a561c4a0f..2c5ed0a7cb18 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1811,7 +1811,7 @@ void bond_xdp_set_features(struct net_device *bond_dev) ASSERT_RTNL(); - if (!bond_xdp_check(bond)) { + if (!bond_xdp_check(bond) || !bond_has_slaves(bond)) { xdp_clear_features_flag(bond_dev); return; } diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 620766eb6bc1..2e31db55d927 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -169,6 +169,7 @@ config CAN_KVASER_PCIEFD Kvaser Mini PCI Express 1xCAN v3 Kvaser Mini PCI Express 2xCAN v3 Kvaser M.2 PCIe 4xCAN + Kvaser PCIe 8xCAN config CAN_SLCAN tristate "Serial / USB serial CAN Adaptors (slcan)" diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 416f10480b40..f81b598147b3 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -27,7 +27,7 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices"); #define KVASER_PCIEFD_BEC_POLL_FREQ (jiffies + msecs_to_jiffies(200)) #define KVASER_PCIEFD_MAX_ERR_REP 256U #define KVASER_PCIEFD_CAN_TX_MAX_COUNT 17U -#define KVASER_PCIEFD_MAX_CAN_CHANNELS 4UL +#define KVASER_PCIEFD_MAX_CAN_CHANNELS 8UL #define KVASER_PCIEFD_DMA_COUNT 2U #define KVASER_PCIEFD_DMA_SIZE (4U * 1024U) @@ -49,6 +49,7 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices"); /* Xilinx based devices */ #define KVASER_PCIEFD_M2_4CAN_DEVICE_ID 0x0017 +#define KVASER_PCIEFD_8CAN_DEVICE_ID 0x0019 /* Altera SerDes Enable 64-bit DMA address translation */ #define KVASER_PCIEFD_ALTERA_DMA_64BIT BIT(0) @@ -497,6 +498,10 @@ static struct pci_device_id kvaser_pciefd_id_table[] = { .driver_data = (kernel_ulong_t)&kvaser_pciefd_xilinx_driver_data, }, { + PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_8CAN_DEVICE_ID), + .driver_data = (kernel_ulong_t)&kvaser_pciefd_xilinx_driver_data, + }, + { 0, }, }; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index eebf967f4711..1d9057dc44f2 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -837,7 +837,7 @@ static int __mcp251xfd_get_berr_counter(const struct net_device *ndev, return err; if (trec & MCP251XFD_REG_TREC_TXBO) - bec->txerr = 256; + bec->txerr = CAN_BUS_OFF_THRESHOLD; else bec->txerr = FIELD_GET(MCP251XFD_REG_TREC_TEC_MASK, trec); bec->rxerr = FIELD_GET(MCP251XFD_REG_TREC_REC_MASK, trec); diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig index d1450722cb3c..bd58c636d465 100644 --- a/drivers/net/can/usb/Kconfig +++ b/drivers/net/can/usb/Kconfig @@ -100,6 +100,7 @@ config CAN_KVASER_USB - Scania VCI2 (if you have the Kvaser logo on top) - Kvaser BlackBird v2 - Kvaser Leaf Pro HS v2 + - Kvaser Leaf v3 - Kvaser Hybrid CAN/LIN - Kvaser Hybrid 2xCAN/LIN - Kvaser Hybrid Pro CAN/LIN diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 95b0fdb602c8..65c962f76898 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -385,7 +385,7 @@ static struct gs_tx_context *gs_get_tx_context(struct gs_can *dev, static int gs_cmd_reset(struct gs_can *dev) { struct gs_device_mode dm = { - .mode = GS_CAN_MODE_RESET, + .mode = cpu_to_le32(GS_CAN_MODE_RESET), }; return usb_control_msg_send(dev->udev, 0, GS_USB_BREQ_MODE, diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 71ef4db5c09f..8faf8a462c05 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -88,6 +88,7 @@ #define USB_USBCAN_PRO_4HS_PRODUCT_ID 0x0114 #define USB_HYBRID_CANLIN_PRODUCT_ID 0x0115 #define USB_HYBRID_PRO_CANLIN_PRODUCT_ID 0x0116 +#define USB_LEAF_V3_PRODUCT_ID 0x0117 static const struct kvaser_usb_driver_info kvaser_usb_driver_info_hydra = { .quirks = KVASER_USB_QUIRK_HAS_HARDWARE_TIMESTAMP, @@ -235,6 +236,8 @@ static const struct usb_device_id kvaser_usb_table[] = { .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_V3_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { } }; MODULE_DEVICE_TABLE(usb, kvaser_usb_table); diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 50351cef6ca5..14923535ca7e 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -49,9 +49,9 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data) mutex_lock(&dev->alu_mutex); ctrl_addr = IND_ACC_TABLE(table) | addr; - ret = ksz_write8(dev, regs[REG_IND_BYTE], data); + ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); if (!ret) - ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); + ret = ksz_write8(dev, regs[REG_IND_BYTE], data); mutex_unlock(&dev->alu_mutex); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 03d966fa67b2..1ae11b180d54 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -966,18 +966,10 @@ mt753x_trap_frames(struct mt7530_priv *priv) MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY)); } -static int +static void mt753x_cpu_port_enable(struct dsa_switch *ds, int port) { struct mt7530_priv *priv = ds->priv; - int ret; - - /* Setup max capability of CPU port at first */ - if (priv->info->cpu_port_config) { - ret = priv->info->cpu_port_config(ds, port); - if (ret) - return ret; - } /* Enable Mediatek header mode on the cpu port */ mt7530_write(priv, MT7530_PVC_P(port), @@ -1003,8 +995,6 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) /* Set to fallback mode for independent VLAN learning */ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, MT7530_PORT_FALLBACK_MODE); - - return 0; } static int @@ -1028,7 +1018,6 @@ mt7530_port_enable(struct dsa_switch *ds, int port, priv->ports[port].enable = true; mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK, priv->ports[port].pm); - mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK); mutex_unlock(&priv->reg_mutex); @@ -1048,7 +1037,6 @@ mt7530_port_disable(struct dsa_switch *ds, int port) priv->ports[port].enable = false; mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK, PCR_MATRIX_CLR); - mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK); mutex_unlock(&priv->reg_mutex); } @@ -2055,7 +2043,7 @@ mt7530_setup_irq(struct mt7530_priv *priv) } /* This register must be set for MT7530 to properly fire interrupts */ - if (priv->id != ID_MT7531) + if (priv->id == ID_MT7530 || priv->id == ID_MT7621) mt7530_set(priv, MT7530_TOP_SIG_CTRL, TOP_SIG_CTRL_NORMAL); ret = request_threaded_irq(priv->irq, NULL, mt7530_irq_thread_fn, @@ -2261,14 +2249,18 @@ mt7530_setup(struct dsa_switch *ds) val |= MHWTRAP_MANUAL; mt7530_write(priv, MT7530_MHWTRAP, val); - priv->p6_interface = PHY_INTERFACE_MODE_NA; - mt753x_trap_frames(priv); /* Enable and reset MIB counters */ mt7530_mib_reset(ds); for (i = 0; i < MT7530_NUM_PORTS; i++) { + /* Clear link settings and enable force mode to force link down + * on all ports until they're enabled later. + */ + mt7530_rmw(priv, MT7530_PMCR_P(i), PMCR_LINK_SETTINGS_MASK | + PMCR_FORCE_MODE, PMCR_FORCE_MODE); + /* Disable forwarding by default on all ports */ mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, PCR_MATRIX_CLR); @@ -2277,9 +2269,7 @@ mt7530_setup(struct dsa_switch *ds) mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); if (dsa_is_cpu_port(ds, i)) { - ret = mt753x_cpu_port_enable(ds, i); - if (ret) - return ret; + mt753x_cpu_port_enable(ds, i); } else { mt7530_port_disable(ds, i); @@ -2373,6 +2363,12 @@ mt7531_setup_common(struct dsa_switch *ds) UNU_FFP_MASK); for (i = 0; i < MT7530_NUM_PORTS; i++) { + /* Clear link settings and enable force mode to force link down + * on all ports until they're enabled later. + */ + mt7530_rmw(priv, MT7530_PMCR_P(i), PMCR_LINK_SETTINGS_MASK | + MT7531_FORCE_MODE, MT7531_FORCE_MODE); + /* Disable forwarding by default on all ports */ mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, PCR_MATRIX_CLR); @@ -2383,9 +2379,7 @@ mt7531_setup_common(struct dsa_switch *ds) mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR); if (dsa_is_cpu_port(ds, i)) { - ret = mt753x_cpu_port_enable(ds, i); - if (ret) - return ret; + mt753x_cpu_port_enable(ds, i); } else { mt7530_port_disable(ds, i); @@ -2451,14 +2445,12 @@ mt7531_setup(struct dsa_switch *ds) val = mt7530_read(priv, MT7531_TOP_SIG_SR); priv->p5_sgmii = !!(val & PAD_DUAL_SGMII_EN); - /* all MACs must be forced link-down before sw reset */ + /* Force link down on all ports before internal reset */ for (i = 0; i < MT7530_NUM_PORTS; i++) mt7530_write(priv, MT7530_PMCR_P(i), MT7531_FORCE_LNK); /* Reset the switch through internal reset */ - mt7530_write(priv, MT7530_SYS_CTRL, - SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | - SYS_CTRL_REG_RST); + mt7530_write(priv, MT7530_SYS_CTRL, SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); if (!priv->p5_sgmii) { mt7531_pll_setup(priv); @@ -2476,10 +2468,6 @@ mt7531_setup(struct dsa_switch *ds) mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK, MT7531_GPIO0_INTERRUPT); - /* Let phylink decide the interface later. */ - priv->p5_interface = PHY_INTERFACE_MODE_NA; - priv->p6_interface = PHY_INTERFACE_MODE_NA; - /* Enable PHY core PLL, since phy_device has not yet been created * provided for phy_[read,write]_mmd_indirect is called, we provide * our own mt7531_ind_mmd_phy_[read,write] to complete this @@ -2589,7 +2577,7 @@ static void mt7988_mac_port_get_caps(struct dsa_switch *ds, int port, } } -static int +static void mt7530_mac_config(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface) { @@ -2599,22 +2587,14 @@ mt7530_mac_config(struct dsa_switch *ds, int port, unsigned int mode, mt7530_setup_port5(priv->ds, interface); else if (port == 6) mt7530_setup_port6(priv->ds, interface); - - return 0; } -static int mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port, - phy_interface_t interface, - struct phy_device *phydev) +static void mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port, + phy_interface_t interface, + struct phy_device *phydev) { u32 val; - if (priv->p5_sgmii) { - dev_err(priv->dev, "RGMII mode is not available for port %d\n", - port); - return -EINVAL; - } - val = mt7530_read(priv, MT7531_CLKGEN_CTRL); val |= GP_CLK_EN; val &= ~GP_MODE_MASK; @@ -2642,31 +2622,14 @@ static int mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port, case PHY_INTERFACE_MODE_RGMII_ID: break; default: - return -EINVAL; + break; } } - mt7530_write(priv, MT7531_CLKGEN_CTRL, val); - - return 0; -} -static bool mt753x_is_mac_port(u32 port) -{ - return (port == 5 || port == 6); -} - -static int -mt7988_mac_config(struct dsa_switch *ds, int port, unsigned int mode, - phy_interface_t interface) -{ - if (dsa_is_cpu_port(ds, port) && - interface == PHY_INTERFACE_MODE_INTERNAL) - return 0; - - return -EINVAL; + mt7530_write(priv, MT7531_CLKGEN_CTRL, val); } -static int +static void mt7531_mac_config(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface) { @@ -2674,39 +2637,11 @@ mt7531_mac_config(struct dsa_switch *ds, int port, unsigned int mode, struct phy_device *phydev; struct dsa_port *dp; - if (!mt753x_is_mac_port(port)) { - dev_err(priv->dev, "port %d is not a MAC port\n", port); - return -EINVAL; - } - - switch (interface) { - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: + if (phy_interface_mode_is_rgmii(interface)) { dp = dsa_to_port(ds, port); phydev = dp->user->phydev; - return mt7531_rgmii_setup(priv, port, interface, phydev); - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_NA: - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: - /* handled in SGMII PCS driver */ - return 0; - default: - return -EINVAL; + mt7531_rgmii_setup(priv, port, interface, phydev); } - - return -EINVAL; -} - -static int -mt753x_mac_config(struct dsa_switch *ds, int port, unsigned int mode, - const struct phylink_link_state *state) -{ - struct mt7530_priv *priv = ds->priv; - - return priv->info->mac_port_config(ds, port, mode, state->interface); } static struct phylink_pcs * @@ -2732,52 +2667,13 @@ mt753x_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) { struct mt7530_priv *priv = ds->priv; - u32 mcr_cur, mcr_new; - - switch (port) { - case 0 ... 4: - if (state->interface != PHY_INTERFACE_MODE_GMII && - state->interface != PHY_INTERFACE_MODE_INTERNAL) - goto unsupported; - break; - case 5: - if (priv->p5_interface == state->interface) - break; - if (mt753x_mac_config(ds, port, mode, state) < 0) - goto unsupported; - - if (priv->p5_intf_sel != P5_DISABLED) - priv->p5_interface = state->interface; - break; - case 6: - if (priv->p6_interface == state->interface) - break; - - if (mt753x_mac_config(ds, port, mode, state) < 0) - goto unsupported; - - priv->p6_interface = state->interface; - break; - default: -unsupported: - dev_err(ds->dev, "%s: unsupported %s port: %i\n", - __func__, phy_modes(state->interface), port); - return; - } - - mcr_cur = mt7530_read(priv, MT7530_PMCR_P(port)); - mcr_new = mcr_cur; - mcr_new &= ~PMCR_LINK_SETTINGS_MASK; - mcr_new |= PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | PMCR_BACKOFF_EN | - PMCR_BACKPR_EN | PMCR_FORCE_MODE_ID(priv->id); + if ((port == 5 || port == 6) && priv->info->mac_port_config) + priv->info->mac_port_config(ds, port, mode, state->interface); /* Are we connected to external phy */ if (port == 5 && dsa_is_user_port(ds, 5)) - mcr_new |= PMCR_EXT_PHY; - - if (mcr_new != mcr_cur) - mt7530_write(priv, MT7530_PMCR_P(port), mcr_new); + mt7530_set(priv, MT7530_PMCR_P(port), PMCR_EXT_PHY); } static void mt753x_phylink_mac_link_down(struct dsa_switch *ds, int port, @@ -2801,17 +2697,10 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, mcr = PMCR_RX_EN | PMCR_TX_EN | PMCR_FORCE_LNK; - /* MT753x MAC works in 1G full duplex mode for all up-clocked - * variants. - */ - if (interface == PHY_INTERFACE_MODE_TRGMII || - (phy_interface_mode_is_8023z(interface))) { - speed = SPEED_1000; - duplex = DUPLEX_FULL; - } - switch (speed) { case SPEED_1000: + case SPEED_2500: + case SPEED_10000: mcr |= PMCR_FORCE_SPEED_1000; break; case SPEED_100: @@ -2829,6 +2718,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) { switch (speed) { case SPEED_1000: + case SPEED_2500: mcr |= PMCR_FORCE_EEE1G; break; case SPEED_100: @@ -2840,63 +2730,6 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, mt7530_set(priv, MT7530_PMCR_P(port), mcr); } -static int -mt7531_cpu_port_config(struct dsa_switch *ds, int port) -{ - struct mt7530_priv *priv = ds->priv; - phy_interface_t interface; - int speed; - int ret; - - switch (port) { - case 5: - if (!priv->p5_sgmii) - interface = PHY_INTERFACE_MODE_RGMII; - else - interface = PHY_INTERFACE_MODE_2500BASEX; - - priv->p5_interface = interface; - break; - case 6: - interface = PHY_INTERFACE_MODE_2500BASEX; - - priv->p6_interface = interface; - break; - default: - return -EINVAL; - } - - if (interface == PHY_INTERFACE_MODE_2500BASEX) - speed = SPEED_2500; - else - speed = SPEED_1000; - - ret = mt7531_mac_config(ds, port, MLO_AN_FIXED, interface); - if (ret) - return ret; - mt7530_write(priv, MT7530_PMCR_P(port), - PMCR_CPU_PORT_SETTING(priv->id)); - mt753x_phylink_mac_link_up(ds, port, MLO_AN_FIXED, interface, NULL, - speed, DUPLEX_FULL, true, true); - - return 0; -} - -static int -mt7988_cpu_port_config(struct dsa_switch *ds, int port) -{ - struct mt7530_priv *priv = ds->priv; - - mt7530_write(priv, MT7530_PMCR_P(port), - PMCR_CPU_PORT_SETTING(priv->id)); - - mt753x_phylink_mac_link_up(ds, port, MLO_AN_FIXED, - PHY_INTERFACE_MODE_INTERNAL, NULL, - SPEED_10000, DUPLEX_FULL, true, true); - - return 0; -} - static void mt753x_phylink_get_caps(struct dsa_switch *ds, int port, struct phylink_config *config) { @@ -2979,17 +2812,9 @@ static int mt753x_setup(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; - int i, ret; - - /* Initialise the PCS devices */ - for (i = 0; i < priv->ds->num_ports; i++) { - priv->pcs[i].pcs.ops = priv->info->pcs_ops; - priv->pcs[i].pcs.neg_mode = true; - priv->pcs[i].priv = priv; - priv->pcs[i].port = i; - } + int ret = priv->info->sw_setup(ds); + int i; - ret = priv->info->sw_setup(ds); if (ret) return ret; @@ -3001,6 +2826,14 @@ mt753x_setup(struct dsa_switch *ds) if (ret && priv->irq) mt7530_free_irq_common(priv); + /* Initialise the PCS devices */ + for (i = 0; i < priv->ds->num_ports; i++) { + priv->pcs[i].pcs.ops = priv->info->pcs_ops; + priv->pcs[i].pcs.neg_mode = true; + priv->pcs[i].priv = priv; + priv->pcs[i].port = i; + } + if (priv->create_sgmii) { ret = priv->create_sgmii(priv); if (ret && priv->irq) @@ -3155,7 +2988,6 @@ const struct mt753x_info mt753x_table[] = { .phy_write_c22 = mt7531_ind_c22_phy_write, .phy_read_c45 = mt7531_ind_c45_phy_read, .phy_write_c45 = mt7531_ind_c45_phy_write, - .cpu_port_config = mt7531_cpu_port_config, .mac_port_get_caps = mt7531_mac_port_get_caps, .mac_port_config = mt7531_mac_config, }, @@ -3167,9 +2999,7 @@ const struct mt753x_info mt753x_table[] = { .phy_write_c22 = mt7531_ind_c22_phy_write, .phy_read_c45 = mt7531_ind_c45_phy_read, .phy_write_c45 = mt7531_ind_c45_phy_write, - .cpu_port_config = mt7988_cpu_port_config, .mac_port_get_caps = mt7988_mac_port_get_caps, - .mac_port_config = mt7988_mac_config, }, }; EXPORT_SYMBOL_GPL(mt753x_table); @@ -3197,8 +3027,7 @@ mt7530_probe_common(struct mt7530_priv *priv) * properly. */ if (!priv->info->sw_setup || !priv->info->phy_read_c22 || - !priv->info->phy_write_c22 || !priv->info->mac_port_get_caps || - !priv->info->mac_port_config) + !priv->info->phy_write_c22 || !priv->info->mac_port_get_caps) return -EINVAL; priv->id = priv->info->id; diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 26a6d2160c08..a71166e0a7fc 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -304,20 +304,11 @@ enum mt7530_vlan_port_acc_frm { MT7531_FORCE_DPX | \ MT7531_FORCE_RX_FC | \ MT7531_FORCE_TX_FC) -#define PMCR_FORCE_MODE_ID(id) ((((id) == ID_MT7531) || ((id) == ID_MT7988)) ? \ - MT7531_FORCE_MODE : PMCR_FORCE_MODE) #define PMCR_LINK_SETTINGS_MASK (PMCR_TX_EN | PMCR_FORCE_SPEED_1000 | \ PMCR_RX_EN | PMCR_FORCE_SPEED_100 | \ PMCR_TX_FC_EN | PMCR_RX_FC_EN | \ PMCR_FORCE_FDX | PMCR_FORCE_LNK | \ PMCR_FORCE_EEE1G | PMCR_FORCE_EEE100) -#define PMCR_CPU_PORT_SETTING(id) (PMCR_FORCE_MODE_ID((id)) | \ - PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | \ - PMCR_BACKOFF_EN | PMCR_BACKPR_EN | \ - PMCR_TX_EN | PMCR_RX_EN | \ - PMCR_TX_FC_EN | PMCR_RX_FC_EN | \ - PMCR_FORCE_SPEED_1000 | \ - PMCR_FORCE_FDX | PMCR_FORCE_LNK) #define MT7530_PMEEECR_P(x) (0x3004 + (x) * 0x100) #define WAKEUP_TIME_1000(x) (((x) & 0xFF) << 24) @@ -724,15 +715,14 @@ struct mt753x_info { int regnum); int (*phy_write_c45)(struct mt7530_priv *priv, int port, int devad, int regnum, u16 val); - int (*cpu_port_config)(struct dsa_switch *ds, int port); void (*mac_port_get_caps)(struct dsa_switch *ds, int port, struct phylink_config *config); void (*mac_port_validate)(struct dsa_switch *ds, int port, phy_interface_t interface, unsigned long *supported); - int (*mac_port_config)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); + void (*mac_port_config)(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface); }; /* struct mt7530_priv - This is the main data structure for holding the state @@ -750,7 +740,6 @@ struct mt753x_info { * @ports: Holding the state among ports * @reg_mutex: The lock for protecting among process accessing * registers - * @p6_interface Holding the current port 6 interface * @p5_intf_sel: Holding the current port 5 interface select * @p5_sgmii: Flag for distinguishing if port 5 of the MT7531 switch * has got SGMII @@ -772,8 +761,6 @@ struct mt7530_priv { const struct mt753x_info *info; unsigned int id; bool mcm; - phy_interface_t p6_interface; - phy_interface_t p5_interface; enum p5_interface_select p5_intf_sel; bool p5_sgmii; u8 mirror_rx; diff --git a/drivers/net/dsa/mv88e6xxx/pcs-6185.c b/drivers/net/dsa/mv88e6xxx/pcs-6185.c index 4d677f836807..5a27d047a38e 100644 --- a/drivers/net/dsa/mv88e6xxx/pcs-6185.c +++ b/drivers/net/dsa/mv88e6xxx/pcs-6185.c @@ -95,7 +95,7 @@ static void mv88e6185_pcs_get_state(struct phylink_pcs *pcs, } } -static int mv88e6185_pcs_config(struct phylink_pcs *pcs, unsigned int mode, +static int mv88e6185_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -137,6 +137,7 @@ static int mv88e6185_pcs_init(struct mv88e6xxx_chip *chip, int port) mpcs->chip = chip; mpcs->port = port; mpcs->phylink_pcs.ops = &mv88e6185_phylink_pcs_ops; + mpcs->phylink_pcs.neg_mode = true; irq = mv88e6xxx_serdes_irq_mapping(chip, port); if (irq) { diff --git a/drivers/net/ethernet/adi/Kconfig b/drivers/net/ethernet/adi/Kconfig index c91b4dcef4ec..760a9a60bc15 100644 --- a/drivers/net/ethernet/adi/Kconfig +++ b/drivers/net/ethernet/adi/Kconfig @@ -7,7 +7,6 @@ config NET_VENDOR_ADI bool "Analog Devices devices" default y depends on SPI - select PHYLIB help If you have a network (Ethernet) card belonging to this class, say Y. @@ -22,6 +21,7 @@ config ADIN1110 tristate "Analog Devices ADIN1110 MAC-PHY" depends on SPI && NET_SWITCHDEV select CRC8 + select PHYLIB help Say yes here to build support for Analog Devices ADIN1110 Low Power 10BASE-T1L Ethernet MAC-PHY. diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index a3c79848a69a..2babea110991 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -160,23 +160,19 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf, if (err < 0) { dev_warn(cf->dev, "auxiliary_device_init of %s failed: %pe\n", name, ERR_PTR(err)); - goto err_out; + kfree(padev); + return ERR_PTR(err); } err = auxiliary_device_add(aux_dev); if (err) { dev_warn(cf->dev, "auxiliary_device_add of %s failed: %pe\n", name, ERR_PTR(err)); - goto err_out_uninit; + auxiliary_device_uninit(aux_dev); + return ERR_PTR(err); } return padev; - -err_out_uninit: - auxiliary_device_uninit(aux_dev); -err_out: - kfree(padev); - return ERR_PTR(err); } int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index 80245c65cc90..a806dadc4196 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -31,6 +31,20 @@ static void _intr2_mask_set(struct bcmasp_priv *priv, u32 mask) priv->irq_mask |= mask; } +void bcmasp_enable_phy_irq(struct bcmasp_intf *intf, int en) +{ + struct bcmasp_priv *priv = intf->parent; + + /* Only supported with internal phys */ + if (!intf->internal_phy) + return; + + if (en) + _intr2_mask_clear(priv, ASP_INTR2_PHY_EVENT(intf->channel)); + else + _intr2_mask_set(priv, ASP_INTR2_PHY_EVENT(intf->channel)); +} + void bcmasp_enable_tx_irq(struct bcmasp_intf *intf, int en) { struct bcmasp_priv *priv = intf->parent; @@ -79,6 +93,9 @@ static void bcmasp_intr2_handling(struct bcmasp_intf *intf, u32 status) __napi_schedule_irqoff(&intf->tx_napi); } } + + if (status & ASP_INTR2_PHY_EVENT(intf->channel)) + phy_mac_interrupt(intf->ndev->phydev); } static irqreturn_t bcmasp_isr(int irq, void *data) @@ -972,7 +989,26 @@ static void bcmasp_core_init(struct bcmasp_priv *priv) ASP_INTR2_CLEAR); } -static void bcmasp_core_clock_select(struct bcmasp_priv *priv, bool slow) +static void bcmasp_core_clock_select_many(struct bcmasp_priv *priv, bool slow) +{ + u32 reg; + + reg = ctrl2_core_rl(priv, ASP_CTRL2_CORE_CLOCK_SELECT); + if (slow) + reg &= ~ASP_CTRL2_CORE_CLOCK_SELECT_MAIN; + else + reg |= ASP_CTRL2_CORE_CLOCK_SELECT_MAIN; + ctrl2_core_wl(priv, reg, ASP_CTRL2_CORE_CLOCK_SELECT); + + reg = ctrl2_core_rl(priv, ASP_CTRL2_CPU_CLOCK_SELECT); + if (slow) + reg &= ~ASP_CTRL2_CPU_CLOCK_SELECT_MAIN; + else + reg |= ASP_CTRL2_CPU_CLOCK_SELECT_MAIN; + ctrl2_core_wl(priv, reg, ASP_CTRL2_CPU_CLOCK_SELECT); +} + +static void bcmasp_core_clock_select_one(struct bcmasp_priv *priv, bool slow) { u32 reg; @@ -1166,6 +1202,24 @@ static void bcmasp_wol_irq_destroy_per_intf(struct bcmasp_priv *priv) } } +static void bcmasp_eee_fixup(struct bcmasp_intf *intf, bool en) +{ + u32 reg, phy_lpi_overwrite; + + reg = rx_edpkt_core_rl(intf->parent, ASP_EDPKT_SPARE_REG); + phy_lpi_overwrite = intf->internal_phy ? ASP_EDPKT_SPARE_REG_EPHY_LPI : + ASP_EDPKT_SPARE_REG_GPHY_LPI; + + if (en) + reg |= phy_lpi_overwrite; + else + reg &= ~phy_lpi_overwrite; + + rx_edpkt_core_wl(intf->parent, reg, ASP_EDPKT_SPARE_REG); + + usleep_range(50, 100); +} + static struct bcmasp_hw_info v20_hw_info = { .rx_ctrl_flush = ASP_RX_CTRL_FLUSH, .umac2fb = UMAC2FB_OFFSET, @@ -1178,6 +1232,7 @@ static const struct bcmasp_plat_data v20_plat_data = { .init_wol = bcmasp_init_wol_per_intf, .enable_wol = bcmasp_enable_wol_per_intf, .destroy_wol = bcmasp_wol_irq_destroy_per_intf, + .core_clock_select = bcmasp_core_clock_select_one, .hw_info = &v20_hw_info, }; @@ -1194,17 +1249,39 @@ static const struct bcmasp_plat_data v21_plat_data = { .init_wol = bcmasp_init_wol_shared, .enable_wol = bcmasp_enable_wol_shared, .destroy_wol = bcmasp_wol_irq_destroy_shared, + .core_clock_select = bcmasp_core_clock_select_one, + .hw_info = &v21_hw_info, +}; + +static const struct bcmasp_plat_data v22_plat_data = { + .init_wol = bcmasp_init_wol_shared, + .enable_wol = bcmasp_enable_wol_shared, + .destroy_wol = bcmasp_wol_irq_destroy_shared, + .core_clock_select = bcmasp_core_clock_select_many, .hw_info = &v21_hw_info, + .eee_fixup = bcmasp_eee_fixup, }; +static void bcmasp_set_pdata(struct bcmasp_priv *priv, const struct bcmasp_plat_data *pdata) +{ + priv->init_wol = pdata->init_wol; + priv->enable_wol = pdata->enable_wol; + priv->destroy_wol = pdata->destroy_wol; + priv->core_clock_select = pdata->core_clock_select; + priv->eee_fixup = pdata->eee_fixup; + priv->hw_info = pdata->hw_info; +} + static const struct of_device_id bcmasp_of_match[] = { { .compatible = "brcm,asp-v2.0", .data = &v20_plat_data }, { .compatible = "brcm,asp-v2.1", .data = &v21_plat_data }, + { .compatible = "brcm,asp-v2.2", .data = &v22_plat_data }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, bcmasp_of_match); static const struct of_device_id bcmasp_mdio_of_match[] = { + { .compatible = "brcm,asp-v2.2-mdio", }, { .compatible = "brcm,asp-v2.1-mdio", }, { .compatible = "brcm,asp-v2.0-mdio", }, { /* sentinel */ }, @@ -1265,16 +1342,13 @@ static int bcmasp_probe(struct platform_device *pdev) if (!pdata) return dev_err_probe(dev, -EINVAL, "unable to find platform data\n"); - priv->init_wol = pdata->init_wol; - priv->enable_wol = pdata->enable_wol; - priv->destroy_wol = pdata->destroy_wol; - priv->hw_info = pdata->hw_info; + bcmasp_set_pdata(priv, pdata); /* Enable all clocks to ensure successful probing */ bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0); /* Switch to the main clock */ - bcmasp_core_clock_select(priv, false); + priv->core_clock_select(priv, false); bcmasp_intr2_mask_set_all(priv); bcmasp_intr2_clear_all(priv); @@ -1381,7 +1455,7 @@ static int __maybe_unused bcmasp_suspend(struct device *d) */ bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_TX_DISABLE); - bcmasp_core_clock_select(priv, true); + priv->core_clock_select(priv, true); clk_disable_unprepare(priv->clk); @@ -1399,7 +1473,7 @@ static int __maybe_unused bcmasp_resume(struct device *d) return ret; /* Switch to the main clock domain */ - bcmasp_core_clock_select(priv, false); + priv->core_clock_select(priv, false); /* Re-enable all clocks for re-initialization */ bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0); diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.h b/drivers/net/ethernet/broadcom/asp2/bcmasp.h index 312bf9b6576e..f93cb3da44b0 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.h +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.h @@ -19,6 +19,8 @@ #define ASP_INTR2_TX_DESC(intr) BIT((intr) + 14) #define ASP_INTR2_UMC0_WAKE BIT(22) #define ASP_INTR2_UMC1_WAKE BIT(28) +#define ASP_INTR2_PHY_EVENT(intr) ((intr) ? BIT(30) | BIT(31) : \ + BIT(24) | BIT(25)) #define ASP_WAKEUP_INTR2_OFFSET 0x1200 #define ASP_WAKEUP_INTR2_STATUS 0x0 @@ -33,6 +35,12 @@ #define ASP_WAKEUP_INTR2_FILT_1 BIT(3) #define ASP_WAKEUP_INTR2_FW BIT(4) +#define ASP_CTRL2_OFFSET 0x2000 +#define ASP_CTRL2_CORE_CLOCK_SELECT 0x0 +#define ASP_CTRL2_CORE_CLOCK_SELECT_MAIN BIT(0) +#define ASP_CTRL2_CPU_CLOCK_SELECT 0x4 +#define ASP_CTRL2_CPU_CLOCK_SELECT_MAIN BIT(0) + #define ASP_TX_ANALYTICS_OFFSET 0x4c000 #define ASP_TX_ANALYTICS_CTRL 0x0 @@ -134,8 +142,11 @@ enum asp_rx_net_filter_block { #define ASP_EDPKT_RX_PKT_CNT 0x138 #define ASP_EDPKT_HDR_EXTR_CNT 0x13c #define ASP_EDPKT_HDR_OUT_CNT 0x140 +#define ASP_EDPKT_SPARE_REG 0x174 +#define ASP_EDPKT_SPARE_REG_EPHY_LPI BIT(4) +#define ASP_EDPKT_SPARE_REG_GPHY_LPI BIT(3) -#define ASP_CTRL 0x101000 +#define ASP_CTRL_OFFSET 0x101000 #define ASP_CTRL_ASP_SW_INIT 0x04 #define ASP_CTRL_ASP_SW_INIT_ACPUSS_CORE BIT(0) #define ASP_CTRL_ASP_SW_INIT_ASP_TX BIT(1) @@ -306,6 +317,7 @@ struct bcmasp_intf { struct bcmasp_desc *rx_edpkt_cpu; dma_addr_t rx_edpkt_dma_addr; dma_addr_t rx_edpkt_dma_read; + dma_addr_t rx_edpkt_dma_valid; /* RX buffer prefetcher ring*/ void *rx_ring_cpu; @@ -372,6 +384,8 @@ struct bcmasp_plat_data { void (*init_wol)(struct bcmasp_priv *priv); void (*enable_wol)(struct bcmasp_intf *intf, bool en); void (*destroy_wol)(struct bcmasp_priv *priv); + void (*core_clock_select)(struct bcmasp_priv *priv, bool slow); + void (*eee_fixup)(struct bcmasp_intf *priv, bool en); struct bcmasp_hw_info *hw_info; }; @@ -390,6 +404,8 @@ struct bcmasp_priv { void (*init_wol)(struct bcmasp_priv *priv); void (*enable_wol)(struct bcmasp_intf *intf, bool en); void (*destroy_wol)(struct bcmasp_priv *priv); + void (*core_clock_select)(struct bcmasp_priv *priv, bool slow); + void (*eee_fixup)(struct bcmasp_intf *intf, bool en); void __iomem *base; struct bcmasp_hw_info *hw_info; @@ -530,7 +546,8 @@ BCMASP_CORE_IO_MACRO(rx_analytics, ASP_RX_ANALYTICS_OFFSET); BCMASP_CORE_IO_MACRO(rx_ctrl, ASP_RX_CTRL_OFFSET); BCMASP_CORE_IO_MACRO(rx_filter, ASP_RX_FILTER_OFFSET); BCMASP_CORE_IO_MACRO(rx_edpkt, ASP_EDPKT_OFFSET); -BCMASP_CORE_IO_MACRO(ctrl, ASP_CTRL); +BCMASP_CORE_IO_MACRO(ctrl, ASP_CTRL_OFFSET); +BCMASP_CORE_IO_MACRO(ctrl2, ASP_CTRL2_OFFSET); struct bcmasp_intf *bcmasp_interface_create(struct bcmasp_priv *priv, struct device_node *ndev_dn, int i); @@ -541,6 +558,8 @@ void bcmasp_enable_tx_irq(struct bcmasp_intf *intf, int en); void bcmasp_enable_rx_irq(struct bcmasp_intf *intf, int en); +void bcmasp_enable_phy_irq(struct bcmasp_intf *intf, int en); + void bcmasp_flush_rx_port(struct bcmasp_intf *intf); extern const struct ethtool_ops bcmasp_ethtool_ops; diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index e429876c7291..dd06b68b33ed 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -382,6 +382,7 @@ static void bcmasp_netif_start(struct net_device *dev) bcmasp_enable_rx_irq(intf, 1); bcmasp_enable_tx_irq(intf, 1); + bcmasp_enable_phy_irq(intf, 1); phy_start(dev->phydev); } @@ -674,40 +675,78 @@ static void bcmasp_adj_link(struct net_device *dev) phy_print_status(phydev); } -static int bcmasp_init_rx(struct bcmasp_intf *intf) +static int bcmasp_alloc_buffers(struct bcmasp_intf *intf) { struct device *kdev = &intf->parent->pdev->dev; struct page *buffer_pg; - dma_addr_t dma; - void *p; - u32 reg; - int ret; + /* Alloc RX */ intf->rx_buf_order = get_order(RING_BUFFER_SIZE); buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order); if (!buffer_pg) return -ENOMEM; - dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE, - DMA_FROM_DEVICE); - if (dma_mapping_error(kdev, dma)) { - __free_pages(buffer_pg, intf->rx_buf_order); - return -ENOMEM; - } intf->rx_ring_cpu = page_to_virt(buffer_pg); - intf->rx_ring_dma = dma; - intf->rx_ring_dma_valid = intf->rx_ring_dma + RING_BUFFER_SIZE - 1; + intf->rx_ring_dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(kdev, intf->rx_ring_dma)) + goto free_rx_buffer; + + intf->rx_edpkt_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE, + &intf->rx_edpkt_dma_addr, GFP_KERNEL); + if (!intf->rx_edpkt_cpu) + goto free_rx_buffer_dma; + + /* Alloc TX */ + intf->tx_spb_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE, + &intf->tx_spb_dma_addr, GFP_KERNEL); + if (!intf->tx_spb_cpu) + goto free_rx_edpkt_dma; - p = dma_alloc_coherent(kdev, DESC_RING_SIZE, &intf->rx_edpkt_dma_addr, + intf->tx_cbs = kcalloc(DESC_RING_COUNT, sizeof(struct bcmasp_tx_cb), GFP_KERNEL); - if (!p) { - ret = -ENOMEM; - goto free_rx_ring; - } - intf->rx_edpkt_cpu = p; + if (!intf->tx_cbs) + goto free_tx_spb_dma; - netif_napi_add(intf->ndev, &intf->rx_napi, bcmasp_rx_poll); + return 0; + +free_tx_spb_dma: + dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu, + intf->tx_spb_dma_addr); +free_rx_edpkt_dma: + dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu, + intf->rx_edpkt_dma_addr); +free_rx_buffer_dma: + dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, + DMA_FROM_DEVICE); +free_rx_buffer: + __free_pages(buffer_pg, intf->rx_buf_order); + + return -ENOMEM; +} + +static void bcmasp_reclaim_free_buffers(struct bcmasp_intf *intf) +{ + struct device *kdev = &intf->parent->pdev->dev; + + /* RX buffers */ + dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu, + intf->rx_edpkt_dma_addr); + dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, + DMA_FROM_DEVICE); + __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order); + /* TX buffers */ + dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu, + intf->tx_spb_dma_addr); + kfree(intf->tx_cbs); +} + +static void bcmasp_init_rx(struct bcmasp_intf *intf) +{ + /* Restart from index 0 */ + intf->rx_ring_dma_valid = intf->rx_ring_dma + RING_BUFFER_SIZE - 1; + intf->rx_edpkt_dma_valid = intf->rx_edpkt_dma_addr + (DESC_RING_SIZE - 1); intf->rx_edpkt_dma_read = intf->rx_edpkt_dma_addr; intf->rx_edpkt_index = 0; @@ -733,64 +772,23 @@ static int bcmasp_init_rx(struct bcmasp_intf *intf) rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr, RX_EDPKT_DMA_WRITE); rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr, RX_EDPKT_DMA_READ); rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr, RX_EDPKT_DMA_BASE); - rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr + (DESC_RING_SIZE - 1), - RX_EDPKT_DMA_END); - rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr + (DESC_RING_SIZE - 1), - RX_EDPKT_DMA_VALID); - - reg = UMAC2FB_CFG_DEFAULT_EN | - ((intf->channel + 11) << UMAC2FB_CFG_CHID_SHIFT); - reg |= (0xd << UMAC2FB_CFG_OK_SEND_SHIFT); - umac2fb_wl(intf, reg, UMAC2FB_CFG); - - return 0; - -free_rx_ring: - dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, - DMA_FROM_DEVICE); - __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order); + rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_valid, RX_EDPKT_DMA_END); + rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_valid, RX_EDPKT_DMA_VALID); - return ret; + umac2fb_wl(intf, UMAC2FB_CFG_DEFAULT_EN | ((intf->channel + 11) << + UMAC2FB_CFG_CHID_SHIFT) | (0xd << UMAC2FB_CFG_OK_SEND_SHIFT), + UMAC2FB_CFG); } -static void bcmasp_reclaim_free_all_rx(struct bcmasp_intf *intf) -{ - struct device *kdev = &intf->parent->pdev->dev; - dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu, - intf->rx_edpkt_dma_addr); - dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, - DMA_FROM_DEVICE); - __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order); -} - -static int bcmasp_init_tx(struct bcmasp_intf *intf) +static void bcmasp_init_tx(struct bcmasp_intf *intf) { - struct device *kdev = &intf->parent->pdev->dev; - void *p; - int ret; - - p = dma_alloc_coherent(kdev, DESC_RING_SIZE, &intf->tx_spb_dma_addr, - GFP_KERNEL); - if (!p) - return -ENOMEM; - - intf->tx_spb_cpu = p; + /* Restart from index 0 */ intf->tx_spb_dma_valid = intf->tx_spb_dma_addr + DESC_RING_SIZE - 1; intf->tx_spb_dma_read = intf->tx_spb_dma_addr; - - intf->tx_cbs = kcalloc(DESC_RING_COUNT, sizeof(struct bcmasp_tx_cb), - GFP_KERNEL); - if (!intf->tx_cbs) { - ret = -ENOMEM; - goto free_tx_spb; - } - intf->tx_spb_index = 0; intf->tx_spb_clean_index = 0; - netif_napi_add_tx(intf->ndev, &intf->tx_napi, bcmasp_tx_poll); - /* Make sure channels are disabled */ tx_spb_ctrl_wl(intf, 0x0, TX_SPB_CTRL_ENABLE); tx_epkt_core_wl(intf, 0x0, TX_EPKT_C_CFG_MISC); @@ -806,26 +804,6 @@ static int bcmasp_init_tx(struct bcmasp_intf *intf) tx_spb_dma_wq(intf, intf->tx_spb_dma_addr, TX_SPB_DMA_BASE); tx_spb_dma_wq(intf, intf->tx_spb_dma_valid, TX_SPB_DMA_END); tx_spb_dma_wq(intf, intf->tx_spb_dma_valid, TX_SPB_DMA_VALID); - - return 0; - -free_tx_spb: - dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu, - intf->tx_spb_dma_addr); - - return ret; -} - -static void bcmasp_reclaim_free_all_tx(struct bcmasp_intf *intf) -{ - struct device *kdev = &intf->parent->pdev->dev; - - /* Free descriptors */ - dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu, - intf->tx_spb_dma_addr); - - /* Free cbs */ - kfree(intf->tx_cbs); } static void bcmasp_ephy_enable_set(struct bcmasp_intf *intf, bool enable) @@ -913,12 +891,10 @@ static void bcmasp_netif_deinit(struct net_device *dev) /* Disable interrupts */ bcmasp_enable_tx_irq(intf, 0); bcmasp_enable_rx_irq(intf, 0); + bcmasp_enable_phy_irq(intf, 0); netif_napi_del(&intf->tx_napi); - bcmasp_reclaim_free_all_tx(intf); - netif_napi_del(&intf->rx_napi); - bcmasp_reclaim_free_all_rx(intf); } static int bcmasp_stop(struct net_device *dev) @@ -932,6 +908,8 @@ static int bcmasp_stop(struct net_device *dev) bcmasp_netif_deinit(dev); + bcmasp_reclaim_free_buffers(intf); + phy_disconnect(dev->phydev); /* Disable internal EPHY or external PHY */ @@ -1052,6 +1030,9 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) goto err_phy_disable; } + if (intf->internal_phy) + dev->phydev->irq = PHY_MAC_INTERRUPT; + /* Indicate that the MAC is responsible for PHY PM */ phydev->mac_managed_pm = true; } else if (!intf->wolopts) { @@ -1073,17 +1054,12 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) intf->old_link = -1; intf->old_pause = -1; - ret = bcmasp_init_tx(intf); - if (ret) - goto err_phy_disconnect; - - /* Turn on asp */ + bcmasp_init_tx(intf); + netif_napi_add_tx(intf->ndev, &intf->tx_napi, bcmasp_tx_poll); bcmasp_enable_tx(intf, 1); - ret = bcmasp_init_rx(intf); - if (ret) - goto err_reclaim_tx; - + bcmasp_init_rx(intf); + netif_napi_add(intf->ndev, &intf->rx_napi, bcmasp_rx_poll); bcmasp_enable_rx(intf, 1); /* Turn on UniMAC TX/RX */ @@ -1097,12 +1073,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) return 0; -err_reclaim_tx: - netif_napi_del(&intf->tx_napi); - bcmasp_reclaim_free_all_tx(intf); -err_phy_disconnect: - if (phydev) - phy_disconnect(phydev); err_phy_disable: if (intf->internal_phy) bcmasp_ephy_enable_set(intf, false); @@ -1118,13 +1088,24 @@ static int bcmasp_open(struct net_device *dev) netif_dbg(intf, ifup, dev, "bcmasp open\n"); - ret = clk_prepare_enable(intf->parent->clk); + ret = bcmasp_alloc_buffers(intf); if (ret) return ret; - ret = bcmasp_netif_init(dev, true); + ret = clk_prepare_enable(intf->parent->clk); if (ret) + goto err_free_mem; + + ret = bcmasp_netif_init(dev, true); + if (ret) { clk_disable_unprepare(intf->parent->clk); + goto err_free_mem; + } + + return ret; + +err_free_mem: + bcmasp_reclaim_free_buffers(intf); return ret; } @@ -1333,6 +1314,9 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf) ASP_WAKEUP_INTR2_MASK_CLEAR); } + if (intf->eee.eee_enabled && intf->parent->eee_fixup) + intf->parent->eee_fixup(intf, true); + netif_dbg(intf, wol, ndev, "entered WOL mode\n"); } @@ -1381,6 +1365,9 @@ static void bcmasp_resume_from_wol(struct bcmasp_intf *intf) { u32 reg; + if (intf->eee.eee_enabled && intf->parent->eee_fixup) + intf->parent->eee_fixup(intf, false); + reg = umac_rl(intf, UMC_MPD_CTRL); reg &= ~UMC_MPD_CTRL_MPD_EN; umac_wl(intf, reg, UMC_MPD_CTRL); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 528441b28c4e..c9b6acd8c892 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3538,7 +3538,7 @@ static u8 bnx2x_set_pbd_csum_enc(struct bnx2x *bp, struct sk_buff *skb, u32 *parsing_data, u32 xmit_type) { *parsing_data |= - ((((u8 *)skb_inner_transport_header(skb) - skb->data) >> 1) << + ((skb_inner_transport_offset(skb) >> 1) << ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W; @@ -3570,7 +3570,7 @@ static u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb, u32 *parsing_data, u32 xmit_type) { *parsing_data |= - ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) << + ((skb_transport_offset(skb) >> 1) << ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W; @@ -3613,7 +3613,7 @@ static u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb, struct eth_tx_parse_bd_e1x *pbd, u32 xmit_type) { - u8 hlen = (skb_network_header(skb) - skb->data) >> 1; + u8 hlen = skb_network_offset(skb) >> 1; /* for now NS flag is not used in Linux */ pbd->global_data = @@ -3621,8 +3621,7 @@ static u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb, ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) << ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT)); - pbd->ip_hlen_w = (skb_transport_header(skb) - - skb_network_header(skb)) >> 1; + pbd->ip_hlen_w = skb_network_header_len(skb) >> 1; hlen += pbd->ip_hlen_w; @@ -3667,8 +3666,7 @@ static void bnx2x_update_pbds_gso_enc(struct sk_buff *skb, u8 outerip_off, outerip_len = 0; /* from outer IP to transport */ - hlen_w = (skb_inner_transport_header(skb) - - skb_network_header(skb)) >> 1; + hlen_w = skb_inner_transport_offset(skb) >> 1; /* transport len */ hlen_w += inner_tcp_hdrlen(skb) >> 1; @@ -3714,7 +3712,7 @@ static void bnx2x_update_pbds_gso_enc(struct sk_buff *skb, 0, IPPROTO_TCP, 0)); } - outerip_off = (skb_network_header(skb) - skb->data) >> 1; + outerip_off = (skb_network_offset(skb)) >> 1; *global_data |= outerip_off | diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 207f1f66c117..d7693fdf640d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2017,6 +2017,37 @@ static int fec_get_mac(struct net_device *ndev) /* * Phy section */ + +/* LPI Sleep Ts count base on tx clk (clk_ref). + * The lpi sleep cnt value = X us / (cycle_ns). + */ +static int fec_enet_us_to_tx_cycle(struct net_device *ndev, int us) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + return us * (fep->clk_ref_rate / 1000) / 1000; +} + +static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct ethtool_keee *p = &fep->eee; + unsigned int sleep_cycle, wake_cycle; + + if (enable) { + sleep_cycle = fec_enet_us_to_tx_cycle(ndev, p->tx_lpi_timer); + wake_cycle = sleep_cycle; + } else { + sleep_cycle = 0; + wake_cycle = 0; + } + + writel(sleep_cycle, fep->hwp + FEC_LPI_SLEEP); + writel(wake_cycle, fep->hwp + FEC_LPI_WAKE); + + return 0; +} + static void fec_enet_adjust_link(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); @@ -2056,6 +2087,8 @@ static void fec_enet_adjust_link(struct net_device *ndev) netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } + if (fep->quirks & FEC_QUIRK_HAS_EEE) + fec_enet_eee_mode_set(ndev, phy_dev->enable_tx_lpi); } else { if (fep->link) { netif_stop_queue(ndev); @@ -2415,6 +2448,9 @@ static int fec_enet_mii_probe(struct net_device *ndev) else phy_set_max_speed(phy_dev, 100); + if (fep->quirks & FEC_QUIRK_HAS_EEE) + phy_support_eee(phy_dev); + fep->link = 0; fep->full_duplex = 0; @@ -3121,43 +3157,6 @@ static int fec_enet_set_coalesce(struct net_device *ndev, return 0; } -/* LPI Sleep Ts count base on tx clk (clk_ref). - * The lpi sleep cnt value = X us / (cycle_ns). - */ -static int fec_enet_us_to_tx_cycle(struct net_device *ndev, int us) -{ - struct fec_enet_private *fep = netdev_priv(ndev); - - return us * (fep->clk_ref_rate / 1000) / 1000; -} - -static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable) -{ - struct fec_enet_private *fep = netdev_priv(ndev); - struct ethtool_keee *p = &fep->eee; - unsigned int sleep_cycle, wake_cycle; - int ret = 0; - - if (enable) { - ret = phy_init_eee(ndev->phydev, false); - if (ret) - return ret; - - sleep_cycle = fec_enet_us_to_tx_cycle(ndev, p->tx_lpi_timer); - wake_cycle = sleep_cycle; - } else { - sleep_cycle = 0; - wake_cycle = 0; - } - - p->tx_lpi_enabled = enable; - - writel(sleep_cycle, fep->hwp + FEC_LPI_SLEEP); - writel(wake_cycle, fep->hwp + FEC_LPI_WAKE); - - return 0; -} - static int fec_enet_get_eee(struct net_device *ndev, struct ethtool_keee *edata) { @@ -3171,7 +3170,6 @@ fec_enet_get_eee(struct net_device *ndev, struct ethtool_keee *edata) return -ENETDOWN; edata->tx_lpi_timer = p->tx_lpi_timer; - edata->tx_lpi_enabled = p->tx_lpi_enabled; return phy_ethtool_get_eee(ndev->phydev, edata); } @@ -3181,7 +3179,6 @@ fec_enet_set_eee(struct net_device *ndev, struct ethtool_keee *edata) { struct fec_enet_private *fep = netdev_priv(ndev); struct ethtool_keee *p = &fep->eee; - int ret = 0; if (!(fep->quirks & FEC_QUIRK_HAS_EEE)) return -EOPNOTSUPP; @@ -3191,15 +3188,6 @@ fec_enet_set_eee(struct net_device *ndev, struct ethtool_keee *edata) p->tx_lpi_timer = edata->tx_lpi_timer; - if (!edata->eee_enabled || !edata->tx_lpi_enabled || - !edata->tx_lpi_timer) - ret = fec_enet_eee_mode_set(ndev, false); - else - ret = fec_enet_eee_mode_set(ndev, true); - - if (ret) - return ret; - return phy_ethtool_set_eee(ndev->phydev, edata); } diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 9ba15d3183d7..758535adc9ff 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -1073,6 +1073,14 @@ int memac_initialization(struct mac_device *mac_dev, unsigned long capabilities; unsigned long *supported; + /* The internal connection to the serdes is XGMII, but this isn't + * really correct for the phy mode (which is the external connection). + * However, this is how all older device trees say that they want + * 10GBASE-R (aka XFI), so just convert it for them. + */ + if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) + mac_dev->phy_if = PHY_INTERFACE_MODE_10GBASER; + mac_dev->phylink_ops = &memac_mac_ops; mac_dev->set_promisc = memac_set_promiscuous; mac_dev->change_addr = memac_modify_mac_address; @@ -1139,7 +1147,7 @@ int memac_initialization(struct mac_device *mac_dev, * (and therefore that xfi_pcs cannot be set). If we are defaulting to * XGMII, assume this is for XFI. Otherwise, assume it is for SGMII. */ - if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) + if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_10GBASER) memac->xfi_pcs = pcs; else memac->sgmii_pcs = pcs; @@ -1153,14 +1161,6 @@ int memac_initialization(struct mac_device *mac_dev, goto _return_fm_mac_free; } - /* The internal connection to the serdes is XGMII, but this isn't - * really correct for the phy mode (which is the external connection). - * However, this is how all older device trees say that they want - * 10GBASE-R (aka XFI), so just convert it for them. - */ - if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) - mac_dev->phy_if = PHY_INTERFACE_MODE_10GBASER; - /* TODO: The following interface modes are supported by (some) hardware * but not by this driver: * - 1000BASE-KX diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index fd290f3ad6ec..4814c96d5fe7 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -9,6 +9,7 @@ #include <linux/dma-mapping.h> #include <linux/dmapool.h> +#include <linux/ethtool_netlink.h> #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/u64_stats_sync.h> @@ -51,12 +52,16 @@ #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 +#define GVE_MAX_RX_BUFFER_SIZE 4096 + #define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 #define GVE_XDP_ACTIONS 5 #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 +#define GVE_DEFAULT_HEADER_BUFFER_SIZE 128 + #define DQO_QPL_DEFAULT_TX_PAGES 512 #define DQO_QPL_DEFAULT_RX_PAGES 2048 @@ -150,6 +155,11 @@ struct gve_rx_compl_queue_dqo { u32 mask; /* Mask for indices to the size of the ring */ }; +struct gve_header_buf { + u8 *data; + dma_addr_t addr; +}; + /* Stores state for tracking buffers posted to HW */ struct gve_rx_buf_state_dqo { /* The page posted to HW. */ @@ -252,19 +262,26 @@ struct gve_rx_ring { /* track number of used buffers */ u16 used_buf_states_cnt; + + /* Address info of the buffers for header-split */ + struct gve_header_buf hdr_bufs; } dqo; }; u64 rbytes; /* free-running bytes received */ + u64 rx_hsplit_bytes; /* free-running header bytes received */ u64 rpackets; /* free-running packets received */ u32 cnt; /* free-running total number of completed packets */ u32 fill_cnt; /* free-running total number of descs and buffs posted */ u32 mask; /* masks the cnt and fill_cnt to the size of the ring */ + u64 rx_hsplit_pkt; /* free-running packets with headers split */ u64 rx_copybreak_pkt; /* free-running count of copybreak packets */ u64 rx_copied_pkt; /* free-running total number of copied packets */ u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */ u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */ u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */ + /* free-running count of unsplit packets due to header buffer overflow or hdr_len is 0 */ + u64 rx_hsplit_unsplit_pkt; u64 rx_cont_packet_cnt; /* free-running multi-fragment packets received */ u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */ u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */ @@ -664,6 +681,7 @@ struct gve_rx_alloc_rings_cfg { struct gve_qpl_config *qpl_cfg; u16 ring_size; + u16 packet_buffer_size; bool raw_addressing; bool enable_header_split; @@ -778,13 +796,17 @@ struct gve_priv { struct gve_ptype_lut *ptype_lut_dqo; /* Must be a power of two. */ - int data_buffer_size_dqo; + u16 data_buffer_size_dqo; + u16 max_rx_buffer_size; /* device limit */ enum gve_queue_format queue_format; /* Interrupt coalescing settings */ u32 tx_coalesce_usecs; u32 rx_coalesce_usecs; + + u16 header_buf_size; /* device configured, header-split supported if non-zero */ + bool header_split_enabled; /* True if the header split is enabled by the user */ }; enum gve_service_task_flags_bit { @@ -1122,6 +1144,9 @@ void gve_rx_free_rings_gqi(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg); void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx); void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx); +u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit); +bool gve_header_split_supported(const struct gve_priv *priv); +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split); /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 12fbd723ecc6..ae12ac38e18b 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -40,7 +40,8 @@ void gve_parse_device_option(struct gve_priv *priv, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, - struct gve_device_option_dqo_qpl **dev_op_dqo_qpl) + struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes) { u32 req_feat_mask = be32_to_cpu(option->required_features_mask); u16 option_length = be16_to_cpu(option->option_length); @@ -147,6 +148,23 @@ void gve_parse_device_option(struct gve_priv *priv, } *dev_op_jumbo_frames = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_BUFFER_SIZES: + if (option_length < sizeof(**dev_op_buffer_sizes) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Buffer Sizes", + (int)sizeof(**dev_op_buffer_sizes), + GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_buffer_sizes)) + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Buffer Sizes"); + *dev_op_buffer_sizes = (void *)(option + 1); + break; default: /* If we don't recognize the option just continue * without doing anything. @@ -164,7 +182,8 @@ gve_process_device_options(struct gve_priv *priv, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, - struct gve_device_option_dqo_qpl **dev_op_dqo_qpl) + struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes) { const int num_options = be16_to_cpu(descriptor->num_device_options); struct gve_device_option *dev_opt; @@ -185,7 +204,7 @@ gve_process_device_options(struct gve_priv *priv, gve_parse_device_option(priv, descriptor, dev_opt, dev_op_gqi_rda, dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_jumbo_frames, - dev_op_dqo_qpl); + dev_op_dqo_qpl, dev_op_buffer_sizes); dev_opt = next_opt; } @@ -640,6 +659,9 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) cpu_to_be16(rx_buff_ring_entries); cmd.create_rx_queue.enable_rsc = !!(priv->dev->features & NETIF_F_LRO); + if (priv->header_split_enabled) + cmd.create_rx_queue.header_buffer_size = + cpu_to_be16(priv->header_buf_size); } return gve_adminq_issue_cmd(priv, &cmd); @@ -755,7 +777,9 @@ static void gve_enable_supported_features(struct gve_priv *priv, const struct gve_device_option_jumbo_frames *dev_op_jumbo_frames, const struct gve_device_option_dqo_qpl - *dev_op_dqo_qpl) + *dev_op_dqo_qpl, + const struct gve_device_option_buffer_sizes + *dev_op_buffer_sizes) { /* Before control reaches this point, the page-size-capped max MTU from * the gve_device_descriptor field has already been stored in @@ -779,10 +803,22 @@ static void gve_enable_supported_features(struct gve_priv *priv, if (priv->rx_pages_per_qpl == 0) priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES; } + + if (dev_op_buffer_sizes && + (supported_features_mask & GVE_SUP_BUFFER_SIZES_MASK)) { + priv->max_rx_buffer_size = + be16_to_cpu(dev_op_buffer_sizes->packet_buffer_size); + priv->header_buf_size = + be16_to_cpu(dev_op_buffer_sizes->header_buffer_size); + dev_info(&priv->pdev->dev, + "BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n", + priv->max_rx_buffer_size, priv->header_buf_size); + } } int gve_adminq_describe_device(struct gve_priv *priv) { + struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL; struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; @@ -816,7 +852,8 @@ int gve_adminq_describe_device(struct gve_priv *priv) err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda, &dev_op_gqi_qpl, &dev_op_dqo_rda, &dev_op_jumbo_frames, - &dev_op_dqo_qpl); + &dev_op_dqo_qpl, + &dev_op_buffer_sizes); if (err) goto free_device_descriptor; @@ -885,7 +922,8 @@ int gve_adminq_describe_device(struct gve_priv *priv) priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues); gve_enable_supported_features(priv, supported_features_mask, - dev_op_jumbo_frames, dev_op_dqo_qpl); + dev_op_jumbo_frames, dev_op_dqo_qpl, + dev_op_buffer_sizes); free_device_descriptor: dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 5865ccdccbd0..5ac972e45ff8 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -125,6 +125,15 @@ struct gve_device_option_jumbo_frames { static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8); +struct gve_device_option_buffer_sizes { + /* GVE_SUP_BUFFER_SIZES_MASK bit should be set */ + __be32 supported_features_mask; + __be16 packet_buffer_size; + __be16 header_buffer_size; +}; + +static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8); + /* Terminology: * * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA @@ -140,6 +149,7 @@ enum gve_dev_opt_id { GVE_DEV_OPT_ID_DQO_RDA = 0x4, GVE_DEV_OPT_ID_DQO_QPL = 0x7, GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, + GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa, }; enum gve_dev_opt_req_feat_mask { @@ -149,10 +159,12 @@ enum gve_dev_opt_req_feat_mask { GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0, }; enum gve_sup_feature_mask { GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, + GVE_SUP_BUFFER_SIZES_MASK = 1 << 4, }; #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 @@ -165,6 +177,7 @@ enum gve_driver_capbility { gve_driver_capability_dqo_qpl = 2, /* reserved for future use */ gve_driver_capability_dqo_rda = 3, gve_driver_capability_alt_miss_compl = 4, + gve_driver_capability_flexible_buffer_size = 5, }; #define GVE_CAP1(a) BIT((int)a) @@ -176,7 +189,8 @@ enum gve_driver_capbility { (GVE_CAP1(gve_driver_capability_gqi_qpl) | \ GVE_CAP1(gve_driver_capability_gqi_rda) | \ GVE_CAP1(gve_driver_capability_dqo_rda) | \ - GVE_CAP1(gve_driver_capability_alt_miss_compl)) + GVE_CAP1(gve_driver_capability_alt_miss_compl) | \ + GVE_CAP1(gve_driver_capability_flexible_buffer_size)) #define GVE_DRIVER_CAPABILITY_FLAGS2 0x0 #define GVE_DRIVER_CAPABILITY_FLAGS3 0x0 @@ -260,7 +274,9 @@ struct gve_adminq_create_rx_queue { __be16 packet_buffer_size; __be16 rx_buff_ring_size; u8 enable_rsc; - u8 padding[5]; + u8 padding1; + __be16 header_buffer_size; + u8 padding2[2]; }; static_assert(sizeof(struct gve_adminq_create_rx_queue) == 56); diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index e5397aa1e48f..9aebfb843d9d 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -4,7 +4,6 @@ * Copyright (C) 2015-2021 Google, Inc. */ -#include <linux/ethtool.h> #include <linux/rtnetlink.h> #include "gve.h" #include "gve_adminq.h" @@ -40,17 +39,18 @@ static u32 gve_get_msglevel(struct net_device *netdev) * as declared in enum xdp_action inside file uapi/linux/bpf.h . */ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { - "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", - "rx_dropped", "tx_dropped", "tx_timeouts", + "rx_packets", "rx_hsplit_pkt", "tx_packets", "rx_bytes", + "tx_bytes", "rx_dropped", "tx_dropped", "tx_timeouts", "rx_skb_alloc_fail", "rx_buf_alloc_fail", "rx_desc_err_dropped_pkt", + "rx_hsplit_unsplit_pkt", "interface_up_cnt", "interface_down_cnt", "reset_cnt", "page_alloc_fail", "dma_mapping_error", "stats_report_trigger_cnt", }; static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { - "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", "rx_bytes[%u]", - "rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", - "rx_frag_alloc_cnt[%u]", + "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", + "rx_bytes[%u]", "rx_hsplit_bytes[%u]", "rx_cont_packet_cnt[%u]", + "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", "rx_frag_alloc_cnt[%u]", "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]", "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]", "rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]", @@ -154,11 +154,13 @@ static void gve_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { - u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail, - tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt, + u64 tmp_rx_pkts, tmp_rx_hsplit_pkt, tmp_rx_bytes, tmp_rx_hsplit_bytes, + tmp_rx_skb_alloc_fail, tmp_rx_buf_alloc_fail, + tmp_rx_desc_err_dropped_pkt, tmp_rx_hsplit_unsplit_pkt, tmp_tx_pkts, tmp_tx_bytes; - u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts, - rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, tx_dropped; + u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_hsplit_unsplit_pkt, + rx_pkts, rx_hsplit_pkt, rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, + tx_dropped; int stats_idx, base_stats_idx, max_stats_idx; struct stats *report_stats; int *rx_qid_to_stats_idx; @@ -185,8 +187,10 @@ gve_get_ethtool_stats(struct net_device *netdev, kfree(rx_qid_to_stats_idx); return; } - for (rx_pkts = 0, rx_bytes = 0, rx_skb_alloc_fail = 0, - rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, ring = 0; + for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0, + rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0, + rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0, + ring = 0; ring < priv->rx_cfg.num_queues; ring++) { if (priv->rx) { do { @@ -195,18 +199,23 @@ gve_get_ethtool_stats(struct net_device *netdev, start = u64_stats_fetch_begin(&priv->rx[ring].statss); tmp_rx_pkts = rx->rpackets; + tmp_rx_hsplit_pkt = rx->rx_hsplit_pkt; tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; + tmp_rx_hsplit_unsplit_pkt = + rx->rx_hsplit_unsplit_pkt; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); rx_pkts += tmp_rx_pkts; + rx_hsplit_pkt += tmp_rx_hsplit_pkt; rx_bytes += tmp_rx_bytes; rx_skb_alloc_fail += tmp_rx_skb_alloc_fail; rx_buf_alloc_fail += tmp_rx_buf_alloc_fail; rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt; + rx_hsplit_unsplit_pkt += tmp_rx_hsplit_unsplit_pkt; } } for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0; @@ -227,6 +236,7 @@ gve_get_ethtool_stats(struct net_device *netdev, i = 0; data[i++] = rx_pkts; + data[i++] = rx_hsplit_pkt; data[i++] = tx_pkts; data[i++] = rx_bytes; data[i++] = tx_bytes; @@ -238,6 +248,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx_skb_alloc_fail; data[i++] = rx_buf_alloc_fail; data[i++] = rx_desc_err_dropped_pkt; + data[i++] = rx_hsplit_unsplit_pkt; data[i++] = priv->interface_up_cnt; data[i++] = priv->interface_down_cnt; data[i++] = priv->reset_cnt; @@ -277,6 +288,7 @@ gve_get_ethtool_stats(struct net_device *netdev, start = u64_stats_fetch_begin(&priv->rx[ring].statss); tmp_rx_bytes = rx->rbytes; + tmp_rx_hsplit_bytes = rx->rx_hsplit_bytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = @@ -284,6 +296,7 @@ gve_get_ethtool_stats(struct net_device *netdev, } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); data[i++] = tmp_rx_bytes; + data[i++] = tmp_rx_hsplit_bytes; data[i++] = rx->rx_cont_packet_cnt; data[i++] = rx->rx_frag_flip_cnt; data[i++] = rx->rx_frag_copy_cnt; @@ -480,6 +493,29 @@ static void gve_get_ringparam(struct net_device *netdev, cmd->tx_max_pending = priv->tx_desc_cnt; cmd->rx_pending = priv->rx_desc_cnt; cmd->tx_pending = priv->tx_desc_cnt; + + if (!gve_header_split_supported(priv)) + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + else if (priv->header_split_enabled) + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; + else + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED; +} + +static int gve_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *cmd, + struct kernel_ethtool_ringparam *kernel_cmd, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(netdev); + + if (priv->tx_desc_cnt != cmd->tx_pending || + priv->rx_desc_cnt != cmd->rx_pending) { + dev_info(&priv->pdev->dev, "Modify ring size is not supported.\n"); + return -EOPNOTSUPP; + } + + return gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split); } static int gve_user_reset(struct net_device *netdev, u32 *flags) @@ -655,6 +691,7 @@ static int gve_set_coalesce(struct net_device *netdev, const struct ethtool_ops gve_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_drvinfo = gve_get_drvinfo, .get_strings = gve_get_strings, .get_sset_count = gve_get_sset_count, @@ -667,6 +704,7 @@ const struct ethtool_ops gve_ethtool_ops = { .get_coalesce = gve_get_coalesce, .set_coalesce = gve_set_coalesce, .get_ringparam = gve_get_ringparam, + .set_ringparam = gve_set_ringparam, .reset = gve_user_reset, .get_tunable = gve_get_tunable, .set_tunable = gve_set_tunable, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index db6d9ae7cd78..166bd827a6d7 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1276,17 +1276,10 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) static void gve_drain_page_cache(struct gve_priv *priv) { - struct page_frag_cache *nc; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - nc = &priv->rx[i].page_cache; - if (nc->va) { - __page_frag_cache_drain(virt_to_page(nc->va), - nc->pagecnt_bias); - nc->va = NULL; - } - } + for (i = 0; i < priv->rx_cfg.num_queues; i++) + page_frag_cache_drain(&priv->rx[i].page_cache); } static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv, @@ -1307,9 +1300,13 @@ static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, cfg->qcfg = &priv->rx_cfg; cfg->qcfg_tx = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); + cfg->enable_header_split = priv->header_split_enabled; cfg->qpls = priv->qpls; cfg->qpl_cfg = &priv->qpl_cfg; cfg->ring_size = priv->rx_desc_cnt; + cfg->packet_buffer_size = gve_is_gqi(priv) ? + GVE_DEFAULT_RX_BUFFER_SIZE : + priv->data_buffer_size_dqo; cfg->rx = priv->rx; } @@ -1448,12 +1445,9 @@ static int gve_queues_start(struct gve_priv *priv, if (err) goto reset; - if (!gve_is_gqi(priv)) { - /* Hard code this for now. This may be tuned in the future for - * performance. - */ - priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; - } + priv->header_split_enabled = rx_alloc_cfg->enable_header_split; + priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; + err = gve_create_rings(priv); if (err) goto reset; @@ -2065,6 +2059,56 @@ out: priv->tx_timeo_cnt++; } +u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) +{ + if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) + return GVE_MAX_RX_BUFFER_SIZE; + else + return GVE_DEFAULT_RX_BUFFER_SIZE; +} + +/* header-split is not supported on non-DQO_RDA yet even if device advertises it */ +bool gve_header_split_supported(const struct gve_priv *priv) +{ + return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; +} + +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; + bool enable_hdr_split; + int err = 0; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + return 0; + + if (!gve_header_split_supported(priv)) { + dev_err(&priv->pdev->dev, "Header-split not supported\n"); + return -EOPNOTSUPP; + } + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + enable_hdr_split = true; + else + enable_hdr_split = false; + + if (enable_hdr_split == priv->header_split_enabled) + return 0; + + gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, + &tx_alloc_cfg, &rx_alloc_cfg); + + rx_alloc_cfg.enable_header_split = enable_hdr_split; + rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); + + if (netif_running(priv->dev)) + err = gve_adjust_config(priv, &qpls_alloc_cfg, + &tx_alloc_cfg, &rx_alloc_cfg); + return err; +} + static int gve_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2511,6 +2555,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) priv->service_task_flags = 0x0; priv->state_flags = 0x0; priv->ethtool_flags = 0x0; + priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; + priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_set_probe_in_progress(priv); priv->gve_wq = alloc_ordered_workqueue("gve", 0); diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 8e6aeb5b3ed4..8e8071308aeb 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -199,6 +199,18 @@ static int gve_alloc_page_dqo(struct gve_rx_ring *rx, return 0; } +static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) +{ + struct device *hdev = &priv->pdev->dev; + int buf_count = rx->dqo.bufq.mask + 1; + + if (rx->dqo.hdr_bufs.data) { + dma_free_coherent(hdev, priv->header_buf_size * buf_count, + rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr); + rx->dqo.hdr_bufs.data = NULL; + } +} + void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -258,9 +270,24 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, kvfree(rx->dqo.buf_states); rx->dqo.buf_states = NULL; + gve_rx_free_hdr_bufs(priv, rx); + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } +static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) +{ + struct device *hdev = &priv->pdev->dev; + int buf_count = rx->dqo.bufq.mask + 1; + + rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count, + &rx->dqo.hdr_bufs.addr, GFP_KERNEL); + if (!rx->dqo.hdr_bufs.data) + return -ENOMEM; + + return 0; +} + void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -302,6 +329,11 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, if (!rx->dqo.buf_states) return -ENOMEM; + /* Allocate header buffers for header-split */ + if (cfg->enable_header_split) + if (gve_rx_alloc_hdr_bufs(priv, rx)) + goto err; + /* Set up linked list of buffer IDs */ for (i = 0; i < rx->dqo.num_buf_states - 1; i++) rx->dqo.buf_states[i].next = i + 1; @@ -443,6 +475,10 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); desc->buf_addr = cpu_to_le64(buf_state->addr + buf_state->page_info.page_offset); + if (rx->dqo.hdr_bufs.data) + desc->header_buf_addr = + cpu_to_le64(rx->dqo.hdr_bufs.addr + + priv->header_buf_size * bufq->tail); bufq->tail = (bufq->tail + 1) & bufq->mask; complq->num_free_slots--; @@ -458,7 +494,7 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { - const int data_buffer_size = priv->data_buffer_size_dqo; + const u16 data_buffer_size = priv->data_buffer_size_dqo; int pagecount; /* Can't reuse if we only fit one buffer per page */ @@ -645,13 +681,16 @@ static int gve_rx_append_frags(struct napi_struct *napi, */ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, const struct gve_rx_compl_desc_dqo *compl_desc, - int queue_idx) + u32 desc_idx, int queue_idx) { const u16 buffer_id = le16_to_cpu(compl_desc->buf_id); + const bool hbo = compl_desc->header_buffer_overflow; const bool eop = compl_desc->end_of_packet != 0; + const bool hsplit = compl_desc->split_header; struct gve_rx_buf_state_dqo *buf_state; struct gve_priv *priv = rx->gve; u16 buf_len; + u16 hdr_len; if (unlikely(buffer_id >= rx->dqo.num_buf_states)) { net_err_ratelimited("%s: Invalid RX buffer_id=%u\n", @@ -672,12 +711,35 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, } buf_len = compl_desc->packet_len; + hdr_len = compl_desc->header_len; /* Page might have not been used for awhile and was likely last written * by a different thread. */ prefetch(buf_state->page_info.page); + /* Copy the header into the skb in the case of header split */ + if (hsplit) { + int unsplit = 0; + + if (hdr_len && !hbo) { + rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, + rx->dqo.hdr_bufs.data + + desc_idx * priv->header_buf_size, + hdr_len); + if (unlikely(!rx->ctx.skb_head)) + goto error; + rx->ctx.skb_tail = rx->ctx.skb_head; + } else { + unsplit = 1; + } + u64_stats_update_begin(&rx->statss); + rx->rx_hsplit_pkt++; + rx->rx_hsplit_unsplit_pkt += unsplit; + rx->rx_hsplit_bytes += hdr_len; + u64_stats_update_end(&rx->statss); + } + /* Sync the portion of dma buffer for CPU to read. */ dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, buf_state->page_info.page_offset, @@ -820,7 +882,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) /* Do not read data until we own the descriptor */ dma_rmb(); - err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num); + err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); if (err < 0) { gve_rx_free_skb(rx); u64_stats_update_begin(&rx->statss); diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 535b1796b91d..2349750075a5 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -64,11 +64,9 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) rx->ntfy_id = ntfy_idx; } -struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, - struct gve_rx_slot_page_info *page_info, u16 len) +struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi, + u8 *data, u16 len) { - void *va = page_info->page_address + page_info->page_offset + - page_info->pad; struct sk_buff *skb; skb = napi_alloc_skb(napi, len); @@ -76,12 +74,21 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, return NULL; __skb_put(skb, len); - skb_copy_to_linear_data_offset(skb, 0, va, len); + skb_copy_to_linear_data_offset(skb, 0, data, len); skb->protocol = eth_type_trans(skb, dev); return skb; } +struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, + struct gve_rx_slot_page_info *page_info, u16 len) +{ + void *va = page_info->page_address + page_info->page_offset + + page_info->pad; + + return gve_rx_copy_data(dev, napi, va, len); +} + void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info) { page_info->pagecnt_bias--; diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h index 277921a629f7..bf2e9a0adb36 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.h +++ b/drivers/net/ethernet/google/gve/gve_utils.h @@ -19,6 +19,9 @@ bool gve_rx_was_added_to_block(struct gve_priv *priv, int queue_idx); void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx); void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx); +struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi, + u8 *data, u16 len); + struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, struct gve_rx_slot_page_info *page_info, u16 len); diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index 8a1027ad340d..d4293f76d69d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -12,7 +12,9 @@ #define cls_to_ae_dev(dev) container_of(dev, struct hnae_ae_dev, cls_dev) -static struct class *hnae_class; +static const struct class hnae_class = { + .name = "hnae", +}; static void hnae_list_add(spinlock_t *lock, struct list_head *node, struct list_head *head) @@ -111,7 +113,7 @@ static struct hnae_ae_dev *find_ae(const struct fwnode_handle *fwnode) WARN_ON(!fwnode); - dev = class_find_device(hnae_class, NULL, fwnode, __ae_match); + dev = class_find_device(&hnae_class, NULL, fwnode, __ae_match); return dev ? cls_to_ae_dev(dev) : NULL; } @@ -415,7 +417,7 @@ int hnae_ae_register(struct hnae_ae_dev *hdev, struct module *owner) hdev->owner = owner; hdev->id = (int)atomic_inc_return(&id); hdev->cls_dev.parent = hdev->dev; - hdev->cls_dev.class = hnae_class; + hdev->cls_dev.class = &hnae_class; hdev->cls_dev.release = hnae_release; (void)dev_set_name(&hdev->cls_dev, "hnae%d", hdev->id); ret = device_register(&hdev->cls_dev); @@ -448,13 +450,12 @@ EXPORT_SYMBOL(hnae_ae_unregister); static int __init hnae_init(void) { - hnae_class = class_create("hnae"); - return PTR_ERR_OR_ZERO(hnae_class); + return class_register(&hnae_class); } static void __exit hnae_exit(void) { - class_destroy(hnae_class); + class_unregister(&hnae_class); } subsys_initcall(hnae_init); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index f1695c889d3a..19668a8d22f7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2473,9 +2473,9 @@ static netdev_features_t hns3_features_check(struct sk_buff *skb, return features; if (skb->encapsulation) - len = skb_inner_transport_header(skb) - skb->data; + len = skb_inner_transport_offset(skb); else - len = skb_transport_header(skb) - skb->data; + len = skb_transport_offset(skb); /* Assume L4 is 60 byte as TCP is the only protocol with a * a flexible value, and it's max len is 60 bytes. diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 767358b60507..639fbb12bd35 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -372,6 +372,7 @@ config IGC config IGC_LEDS def_bool LEDS_TRIGGER_NETDEV depends on IGC && LEDS_CLASS + depends on LEDS_CLASS=y || IGC=m help Optional support for controlling the NIC LED's with the netdev LED trigger. diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 01f0f12035ca..3fcb8daaa243 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -171,8 +171,8 @@ static int debug = 3; static int eeprom_bad_csum_allow = 0; static int use_io = 0; module_param(debug, int, 0); -module_param(eeprom_bad_csum_allow, int, 0); -module_param(use_io, int, 0); +module_param(eeprom_bad_csum_allow, int, 0444); +module_param(use_io, int, 0444); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); MODULE_PARM_DESC(eeprom_bad_csum_allow, "Allow bad eeprom checksums"); MODULE_PARM_DESC(use_io, "Force use of i/o access mode"); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index a2788fd5f8bb..19e450a5bd31 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -2559,7 +2559,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw) hw->phy.ops.write_reg_page(hw, BM_RAR_H(i), (u16)(mac_reg & 0xFFFF)); hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i), - FIELD_GET(E1000_RAH_AV, mac_reg)); + (u16)((mac_reg & E1000_RAH_AV) >> 16)); } e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index af5d9d97a0d6..cc8c531ec3df 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6688,14 +6688,14 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) if (adapter->hw.phy.type == e1000_phy_igp_3) { e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); } else if (hw->mac.type >= e1000_pch_lpt) { - if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) + if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) { /* ULP does not support wake from unicast, multicast * or broadcast. */ retval = e1000_enable_ulp_lpt_lp(hw, !runtime); - - if (retval) - return retval; + if (retval) + return retval; + } } /* Ensure that the appropriate bits are set in LPI_CTRL diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f12092cdb1f0..f86578857e8a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13208,12 +13208,12 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb, features &= ~NETIF_F_GSO_MASK; /* MACLEN can support at most 63 words */ - len = skb_network_header(skb) - skb->data; + len = skb_network_offset(skb); if (len & ~(63 * 2)) goto out_err; /* IPLEN and EIPLEN can support at most 127 dwords */ - len = skb_transport_header(skb) - skb_network_header(skb); + len = skb_network_header_len(skb); if (len & ~(127 * 4)) goto out_err; @@ -13523,9 +13523,9 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) return err; i40e_queue_pair_disable_irq(vsi, queue_pair); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); - i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index af4269330581..ce1f11b8ad65 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -567,8 +567,7 @@ static inline bool i40e_is_fw_ver_lt(struct i40e_hw *hw, u16 maj, u16 min) **/ static inline bool i40e_is_fw_ver_eq(struct i40e_hw *hw, u16 maj, u16 min) { - return (hw->aq.fw_maj_ver > maj || - (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min)); + return (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min); } #endif /* _I40E_PROTOTYPE_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 335fd13e86f7..aefec6bd3b67 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4423,12 +4423,12 @@ static netdev_features_t iavf_features_check(struct sk_buff *skb, features &= ~NETIF_F_GSO_MASK; /* MACLEN can support at most 63 words */ - len = skb_network_header(skb) - skb->data; + len = skb_network_offset(skb); if (len & ~(63 * 2)) goto out_err; /* IPLEN and EIPLEN can support at most 127 dwords */ - len = skb_transport_header(skb) - skb_network_header(skb); + len = skb_network_header_len(skb); if (len & ~(127 * 4)) goto out_err; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index ad953208f582..d2fd315556a3 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -190,15 +190,13 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) q_vector = vsi->q_vectors[v_idx]; ice_for_each_tx_ring(tx_ring, q_vector->tx) { - if (vsi->netdev) - netif_queue_set_napi(vsi->netdev, tx_ring->q_index, - NETDEV_QUEUE_TYPE_TX, NULL); + ice_queue_set_napi(vsi, tx_ring->q_index, NETDEV_QUEUE_TYPE_TX, + NULL); tx_ring->q_vector = NULL; } ice_for_each_rx_ring(rx_ring, q_vector->rx) { - if (vsi->netdev) - netif_queue_set_napi(vsi->netdev, rx_ring->q_index, - NETDEV_QUEUE_TYPE_RX, NULL); + ice_queue_set_napi(vsi, rx_ring->q_index, NETDEV_QUEUE_TYPE_RX, + NULL); rx_ring->q_vector = NULL; } diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 9266f25a9978..4d8111aeb0ff 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1002,9 +1002,9 @@ static void ice_get_itr_intrl_gran(struct ice_hw *hw) */ int ice_init_hw(struct ice_hw *hw) { - struct ice_aqc_get_phy_caps_data *pcaps; + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); + void *mac_buf __free(kfree); u16 mac_buf_len; - void *mac_buf; int status; /* Set MAC type based on DeviceID */ @@ -1082,7 +1082,7 @@ int ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_sched; - pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL); + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) { status = -ENOMEM; goto err_unroll_sched; @@ -1092,7 +1092,6 @@ int ice_init_hw(struct ice_hw *hw) status = ice_aq_get_phy_caps(hw->port_info, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, NULL); - devm_kfree(ice_hw_to_dev(hw), pcaps); if (status) dev_warn(ice_hw_to_dev(hw), "Get PHY capabilities failed status = %d, continuing anyway\n", status); @@ -1119,18 +1118,15 @@ int ice_init_hw(struct ice_hw *hw) /* Get MAC information */ /* A single port can report up to two (LAN and WoL) addresses */ - mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2, - sizeof(struct ice_aqc_manage_mac_read_resp), - GFP_KERNEL); - mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp); - + mac_buf = kcalloc(2, sizeof(struct ice_aqc_manage_mac_read_resp), + GFP_KERNEL); if (!mac_buf) { status = -ENOMEM; goto err_unroll_fltr_mgmt_struct; } + mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp); status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL); - devm_kfree(ice_hw_to_dev(hw), mac_buf); if (status) goto err_unroll_fltr_mgmt_struct; @@ -1399,9 +1395,8 @@ static const struct ice_ctx_ele ice_rlan_ctx_info[] = { * it to HW register space and enables the hardware to prefetch descriptors * instead of only fetching them on demand */ -int -ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, - u32 rxq_index) +int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, + u32 rxq_index) { u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 }; @@ -3277,19 +3272,14 @@ int ice_update_link_info(struct ice_port_info *pi) return status; if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) { - struct ice_aqc_get_phy_caps_data *pcaps; - struct ice_hw *hw; + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); - hw = pi->hw; - pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), - GFP_KERNEL); + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return -ENOMEM; status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, NULL); - - devm_kfree(ice_hw_to_dev(hw), pcaps); } return status; @@ -3430,8 +3420,8 @@ ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, int ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update) { + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); struct ice_aqc_set_phy_cfg_data cfg = { 0 }; - struct ice_aqc_get_phy_caps_data *pcaps; struct ice_hw *hw; int status; @@ -3441,7 +3431,7 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update) *aq_failures = 0; hw = pi->hw; - pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL); + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return -ENOMEM; @@ -3493,7 +3483,6 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update) } out: - devm_kfree(ice_hw_to_dev(hw), pcaps); return status; } @@ -3572,7 +3561,7 @@ int ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec) { - struct ice_aqc_get_phy_caps_data *pcaps; + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); struct ice_hw *hw; int status; @@ -3641,8 +3630,6 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, } out: - kfree(pcaps); - return status; } @@ -4362,13 +4349,13 @@ ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps, /* End of FW Admin Queue command wrappers */ /** - * ice_write_byte - write a byte to a packed context structure - * @src_ctx: the context structure to read from - * @dest_ctx: the context to be written to - * @ce_info: a description of the struct to be filled + * ice_pack_ctx_byte - write a byte to a packed context structure + * @src_ctx: unpacked source context structure + * @dest_ctx: packed destination context data + * @ce_info: context element description */ -static void -ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) +static void ice_pack_ctx_byte(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) { u8 src_byte, dest_byte, mask; u8 *from, *dest; @@ -4379,14 +4366,11 @@ ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) /* prepare the bits and mask */ shift_width = ce_info->lsb % 8; - mask = (u8)(BIT(ce_info->width) - 1); + mask = GENMASK(ce_info->width - 1 + shift_width, shift_width); src_byte = *from; - src_byte &= mask; - - /* shift to correct alignment */ - mask <<= shift_width; src_byte <<= shift_width; + src_byte &= mask; /* get the current bits from the target bit string */ dest = dest_ctx + (ce_info->lsb / 8); @@ -4401,13 +4385,13 @@ ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) } /** - * ice_write_word - write a word to a packed context structure - * @src_ctx: the context structure to read from - * @dest_ctx: the context to be written to - * @ce_info: a description of the struct to be filled + * ice_pack_ctx_word - write a word to a packed context structure + * @src_ctx: unpacked source context structure + * @dest_ctx: packed destination context data + * @ce_info: context element description */ -static void -ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) +static void ice_pack_ctx_word(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) { u16 src_word, mask; __le16 dest_word; @@ -4419,17 +4403,14 @@ ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) /* prepare the bits and mask */ shift_width = ce_info->lsb % 8; - mask = BIT(ce_info->width) - 1; + mask = GENMASK(ce_info->width - 1 + shift_width, shift_width); /* don't swizzle the bits until after the mask because the mask bits * will be in a different bit position on big endian machines */ src_word = *(u16 *)from; - src_word &= mask; - - /* shift to correct alignment */ - mask <<= shift_width; src_word <<= shift_width; + src_word &= mask; /* get the current bits from the target bit string */ dest = dest_ctx + (ce_info->lsb / 8); @@ -4444,13 +4425,13 @@ ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) } /** - * ice_write_dword - write a dword to a packed context structure - * @src_ctx: the context structure to read from - * @dest_ctx: the context to be written to - * @ce_info: a description of the struct to be filled + * ice_pack_ctx_dword - write a dword to a packed context structure + * @src_ctx: unpacked source context structure + * @dest_ctx: packed destination context data + * @ce_info: context element description */ -static void -ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) +static void ice_pack_ctx_dword(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) { u32 src_dword, mask; __le32 dest_dword; @@ -4462,25 +4443,14 @@ ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) /* prepare the bits and mask */ shift_width = ce_info->lsb % 8; - - /* if the field width is exactly 32 on an x86 machine, then the shift - * operation will not work because the SHL instructions count is masked - * to 5 bits so the shift will do nothing - */ - if (ce_info->width < 32) - mask = BIT(ce_info->width) - 1; - else - mask = (u32)~0; + mask = GENMASK(ce_info->width - 1 + shift_width, shift_width); /* don't swizzle the bits until after the mask because the mask bits * will be in a different bit position on big endian machines */ src_dword = *(u32 *)from; - src_dword &= mask; - - /* shift to correct alignment */ - mask <<= shift_width; src_dword <<= shift_width; + src_dword &= mask; /* get the current bits from the target bit string */ dest = dest_ctx + (ce_info->lsb / 8); @@ -4495,13 +4465,13 @@ ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) } /** - * ice_write_qword - write a qword to a packed context structure - * @src_ctx: the context structure to read from - * @dest_ctx: the context to be written to - * @ce_info: a description of the struct to be filled + * ice_pack_ctx_qword - write a qword to a packed context structure + * @src_ctx: unpacked source context structure + * @dest_ctx: packed destination context data + * @ce_info: context element description */ -static void -ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) +static void ice_pack_ctx_qword(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) { u64 src_qword, mask; __le64 dest_qword; @@ -4513,25 +4483,14 @@ ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) /* prepare the bits and mask */ shift_width = ce_info->lsb % 8; - - /* if the field width is exactly 64 on an x86 machine, then the shift - * operation will not work because the SHL instructions count is masked - * to 6 bits so the shift will do nothing - */ - if (ce_info->width < 64) - mask = BIT_ULL(ce_info->width) - 1; - else - mask = (u64)~0; + mask = GENMASK_ULL(ce_info->width - 1 + shift_width, shift_width); /* don't swizzle the bits until after the mask because the mask bits * will be in a different bit position on big endian machines */ src_qword = *(u64 *)from; - src_qword &= mask; - - /* shift to correct alignment */ - mask <<= shift_width; src_qword <<= shift_width; + src_qword &= mask; /* get the current bits from the target bit string */ dest = dest_ctx + (ce_info->lsb / 8); @@ -4550,11 +4509,10 @@ ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) * @hw: pointer to the hardware structure * @src_ctx: pointer to a generic non-packed context structure * @dest_ctx: pointer to memory for the packed structure - * @ce_info: a description of the structure to be transformed + * @ce_info: List of Rx context elements */ -int -ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) +int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) { int f; @@ -4570,16 +4528,16 @@ ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, } switch (ce_info[f].size_of) { case sizeof(u8): - ice_write_byte(src_ctx, dest_ctx, &ce_info[f]); + ice_pack_ctx_byte(src_ctx, dest_ctx, &ce_info[f]); break; case sizeof(u16): - ice_write_word(src_ctx, dest_ctx, &ce_info[f]); + ice_pack_ctx_word(src_ctx, dest_ctx, &ce_info[f]); break; case sizeof(u32): - ice_write_dword(src_ctx, dest_ctx, &ce_info[f]); + ice_pack_ctx_dword(src_ctx, dest_ctx, &ce_info[f]); break; case sizeof(u64): - ice_write_qword(src_ctx, dest_ctx, &ce_info[f]); + ice_pack_ctx_qword(src_ctx, dest_ctx, &ce_info[f]); break; default: return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 32fd10de620c..ffb22c7ce28b 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -53,9 +53,8 @@ int ice_get_caps(struct ice_hw *hw); void ice_set_safe_mode_caps(struct ice_hw *hw); -int -ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, - u32 rxq_index); +int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, + u32 rxq_index); int ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params); @@ -72,9 +71,8 @@ bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq); int ice_aq_q_shutdown(struct ice_hw *hw, bool unloading); void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); extern const struct ice_ctx_ele ice_tlan_ctx_info[]; -int -ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info); +int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info); extern struct mutex ice_global_cfg_lock_sw; diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index c0256564e998..e92be6f130a3 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -31,6 +31,26 @@ static const char * const pin_type_name[] = { }; /** + * ice_dpll_is_reset - check if reset is in progress + * @pf: private board structure + * @extack: error reporting + * + * If reset is in progress, fill extack with error. + * + * Return: + * * false - no reset in progress + * * true - reset in progress + */ +static bool ice_dpll_is_reset(struct ice_pf *pf, struct netlink_ext_ack *extack) +{ + if (ice_is_reset_in_progress(pf->state)) { + NL_SET_ERR_MSG(extack, "PF reset in progress"); + return true; + } + return false; +} + +/** * ice_dpll_pin_freq_set - set pin's frequency * @pf: private board structure * @pin: pointer to a pin @@ -109,6 +129,9 @@ ice_dpll_frequency_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); ret = ice_dpll_pin_freq_set(pf, p, pin_type, frequency, extack); mutex_unlock(&pf->dplls.lock); @@ -254,6 +277,7 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv, * ice_dpll_pin_enable - enable a pin on dplls * @hw: board private hw structure * @pin: pointer to a pin + * @dpll_idx: dpll index to connect to output pin * @pin_type: type of pin being enabled * @extack: error reporting * @@ -266,7 +290,7 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv, */ static int ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin, - enum ice_dpll_pin_type pin_type, + u8 dpll_idx, enum ice_dpll_pin_type pin_type, struct netlink_ext_ack *extack) { u8 flags = 0; @@ -280,10 +304,12 @@ ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin, ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0); break; case ICE_DPLL_PIN_TYPE_OUTPUT: + flags = ICE_AQC_SET_CGU_OUT_CFG_UPDATE_SRC_SEL; if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN) flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN; flags |= ICE_AQC_SET_CGU_OUT_CFG_OUT_EN; - ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0); + ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, dpll_idx, + 0, 0); break; default: return -EINVAL; @@ -370,7 +396,7 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin, case ICE_DPLL_PIN_TYPE_INPUT: ret = ice_aq_get_input_pin_cfg(&pf->hw, pin->idx, NULL, NULL, NULL, &pin->flags[0], - &pin->freq, NULL); + &pin->freq, &pin->phase_adjust); if (ret) goto err; if (ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN & pin->flags[0]) { @@ -398,14 +424,27 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin, break; case ICE_DPLL_PIN_TYPE_OUTPUT: ret = ice_aq_get_output_pin_cfg(&pf->hw, pin->idx, - &pin->flags[0], NULL, + &pin->flags[0], &parent, &pin->freq, NULL); if (ret) goto err; - if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) - pin->state[0] = DPLL_PIN_STATE_CONNECTED; - else - pin->state[0] = DPLL_PIN_STATE_DISCONNECTED; + + parent &= ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL; + if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) { + pin->state[pf->dplls.eec.dpll_idx] = + parent == pf->dplls.eec.dpll_idx ? + DPLL_PIN_STATE_CONNECTED : + DPLL_PIN_STATE_DISCONNECTED; + pin->state[pf->dplls.pps.dpll_idx] = + parent == pf->dplls.pps.dpll_idx ? + DPLL_PIN_STATE_CONNECTED : + DPLL_PIN_STATE_DISCONNECTED; + } else { + pin->state[pf->dplls.eec.dpll_idx] = + DPLL_PIN_STATE_DISCONNECTED; + pin->state[pf->dplls.pps.dpll_idx] = + DPLL_PIN_STATE_DISCONNECTED; + } break; case ICE_DPLL_PIN_TYPE_RCLK_INPUT: for (parent = 0; parent < pf->dplls.rclk.num_parents; @@ -570,9 +609,13 @@ ice_dpll_pin_state_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); if (enable) - ret = ice_dpll_pin_enable(&pf->hw, p, pin_type, extack); + ret = ice_dpll_pin_enable(&pf->hw, p, d->dpll_idx, pin_type, + extack); else ret = ice_dpll_pin_disable(&pf->hw, p, pin_type, extack); if (!ret) @@ -605,6 +648,11 @@ ice_dpll_output_state_set(const struct dpll_pin *pin, void *pin_priv, struct netlink_ext_ack *extack) { bool enable = state == DPLL_PIN_STATE_CONNECTED; + struct ice_dpll_pin *p = pin_priv; + struct ice_dpll *d = dpll_priv; + + if (!enable && p->state[d->dpll_idx] == DPLL_PIN_STATE_DISCONNECTED) + return 0; return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable, extack, ICE_DPLL_PIN_TYPE_OUTPUT); @@ -667,14 +715,16 @@ ice_dpll_pin_state_get(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); ret = ice_dpll_pin_state_update(pf, p, pin_type, extack); if (ret) goto unlock; - if (pin_type == ICE_DPLL_PIN_TYPE_INPUT) + if (pin_type == ICE_DPLL_PIN_TYPE_INPUT || + pin_type == ICE_DPLL_PIN_TYPE_OUTPUT) *state = p->state[d->dpll_idx]; - else if (pin_type == ICE_DPLL_PIN_TYPE_OUTPUT) - *state = p->state[0]; ret = 0; unlock: mutex_unlock(&pf->dplls.lock); @@ -792,6 +842,9 @@ ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack); mutex_unlock(&pf->dplls.lock); @@ -912,6 +965,9 @@ ice_dpll_pin_phase_adjust_set(const struct dpll_pin *pin, void *pin_priv, u8 flag, flags_en = 0; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); switch (type) { case ICE_DPLL_PIN_TYPE_INPUT: @@ -1071,6 +1127,9 @@ ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv, int ret = -EINVAL; u32 hw_idx; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); hw_idx = parent->idx - pf->dplls.base_rclk_idx; if (hw_idx >= pf->dplls.num_inputs) @@ -1125,6 +1184,9 @@ ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv, int ret = -EINVAL; u32 hw_idx; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); hw_idx = parent->idx - pf->dplls.base_rclk_idx; if (hw_idx >= pf->dplls.num_inputs) @@ -1307,8 +1369,10 @@ static void ice_dpll_periodic_work(struct kthread_work *work) struct ice_pf *pf = container_of(d, struct ice_pf, dplls); struct ice_dpll *de = &pf->dplls.eec; struct ice_dpll *dp = &pf->dplls.pps; - int ret; + int ret = 0; + if (ice_is_reset_in_progress(pf->state)) + goto resched; mutex_lock(&pf->dplls.lock); ret = ice_dpll_update_state(pf, de, false); if (!ret) @@ -1328,6 +1392,7 @@ static void ice_dpll_periodic_work(struct kthread_work *work) ice_dpll_notify_changes(de); ice_dpll_notify_changes(dp); +resched: /* Run twice a second or reschedule if update failed */ kthread_queue_delayed_work(d->kworker, &d->work, ret ? msecs_to_jiffies(10) : @@ -1534,7 +1599,7 @@ static void ice_dpll_deinit_rclk_pin(struct ice_pf *pf) } if (WARN_ON_ONCE(!vsi || !vsi->netdev)) return; - netdev_dpll_pin_clear(vsi->netdev); + dpll_netdev_pin_clear(vsi->netdev); dpll_pin_put(rclk->pin); } @@ -1578,7 +1643,7 @@ ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin, } if (WARN_ON((!vsi || !vsi->netdev))) return -EINVAL; - netdev_dpll_pin_set(vsi->netdev, pf->dplls.rclk.pin); + dpll_netdev_pin_set(vsi->netdev, pf->dplls.rclk.pin); return 0; @@ -2057,6 +2122,7 @@ void ice_dpll_init(struct ice_pf *pf) struct ice_dplls *d = &pf->dplls; int err = 0; + mutex_init(&d->lock); err = ice_dpll_init_info(pf, cgu); if (err) goto err_exit; @@ -2069,7 +2135,6 @@ void ice_dpll_init(struct ice_pf *pf) err = ice_dpll_init_pins(pf, cgu); if (err) goto deinit_pps; - mutex_init(&d->lock); if (cgu) { err = ice_dpll_init_worker(pf); if (err) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 3cc364a4d682..bb912155676e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -802,7 +802,7 @@ static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size) if (!pf) return -EINVAL; - data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL); + data = kzalloc(size, GFP_KERNEL); if (!data) return -ENOMEM; @@ -945,11 +945,9 @@ static u64 ice_loopback_test(struct net_device *netdev) int num_frames, valid_frames; struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; - struct device *dev; - u8 *tx_frame; + u8 *tx_frame __free(kfree); int i; - dev = ice_pf_to_dev(pf); netdev_info(netdev, "loopback test\n"); test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info); @@ -994,7 +992,7 @@ static u64 ice_loopback_test(struct net_device *netdev) for (i = 0; i < num_frames; i++) { if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) { ret = 8; - goto lbtest_free_frame; + goto remove_mac_filters; } } @@ -1004,8 +1002,6 @@ static u64 ice_loopback_test(struct net_device *netdev) else if (valid_frames != num_frames) ret = 10; -lbtest_free_frame: - devm_kfree(dev, tx_frame); remove_mac_filters: if (ice_fltr_remove_mac(test_vsi, broadcast, ICE_FWD_TO_VSI)) netdev_err(netdev, "Could not remove MAC filter for the test VSI\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 60e0d824195e..71f061667980 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2297,7 +2297,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) ice_vsi_map_rings_to_vectors(vsi); /* Associate q_vector rings to napi */ - ice_vsi_set_napi_queues(vsi, true); + ice_vsi_set_napi_queues(vsi); vsi->stat_offsets_loaded = false; @@ -2720,74 +2720,19 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) } /** - * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI - * @vsi: the VSI being un-configured - */ -void ice_vsi_dis_irq(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - u32 val; - int i; - - /* disable interrupt causation from each queue */ - if (vsi->tx_rings) { - ice_for_each_txq(vsi, i) { - if (vsi->tx_rings[i]) { - u16 reg; - - reg = vsi->tx_rings[i]->reg_idx; - val = rd32(hw, QINT_TQCTL(reg)); - val &= ~QINT_TQCTL_CAUSE_ENA_M; - wr32(hw, QINT_TQCTL(reg), val); - } - } - } - - if (vsi->rx_rings) { - ice_for_each_rxq(vsi, i) { - if (vsi->rx_rings[i]) { - u16 reg; - - reg = vsi->rx_rings[i]->reg_idx; - val = rd32(hw, QINT_RQCTL(reg)); - val &= ~QINT_RQCTL_CAUSE_ENA_M; - wr32(hw, QINT_RQCTL(reg), val); - } - } - } - - /* disable each interrupt */ - ice_for_each_q_vector(vsi, i) { - if (!vsi->q_vectors[i]) - continue; - wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0); - } - - ice_flush(hw); - - /* don't call synchronize_irq() for VF's from the host */ - if (vsi->type == ICE_VSI_VF) - return; - - ice_for_each_q_vector(vsi, i) - synchronize_irq(vsi->q_vectors[i]->irq.virq); -} - -/** - * ice_queue_set_napi - Set the napi instance for the queue + * __ice_queue_set_napi - Set the napi instance for the queue * @dev: device to which NAPI and queue belong * @queue_index: Index of queue * @type: queue type as RX or TX * @napi: NAPI context * @locked: is the rtnl_lock already held * - * Set the napi instance for the queue + * Set the napi instance for the queue. Caller indicates the lock status. */ static void -ice_queue_set_napi(struct net_device *dev, unsigned int queue_index, - enum netdev_queue_type type, struct napi_struct *napi, - bool locked) +__ice_queue_set_napi(struct net_device *dev, unsigned int queue_index, + enum netdev_queue_type type, struct napi_struct *napi, + bool locked) { if (!locked) rtnl_lock(); @@ -2797,26 +2742,79 @@ ice_queue_set_napi(struct net_device *dev, unsigned int queue_index, } /** - * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi + * ice_queue_set_napi - Set the napi instance for the queue + * @vsi: VSI being configured + * @queue_index: Index of queue + * @type: queue type as RX or TX + * @napi: NAPI context + * + * Set the napi instance for the queue. The rtnl lock state is derived from the + * execution path. + */ +void +ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index, + enum netdev_queue_type type, struct napi_struct *napi) +{ + struct ice_pf *pf = vsi->back; + + if (!vsi->netdev) + return; + + if (current_work() == &pf->serv_task || + test_bit(ICE_PREPARED_FOR_RESET, pf->state) || + test_bit(ICE_DOWN, pf->state) || + test_bit(ICE_SUSPENDED, pf->state)) + __ice_queue_set_napi(vsi->netdev, queue_index, type, napi, + false); + else + __ice_queue_set_napi(vsi->netdev, queue_index, type, napi, + true); +} + +/** + * __ice_q_vector_set_napi_queues - Map queue[s] associated with the napi * @q_vector: q_vector pointer * @locked: is the rtnl_lock already held * + * Associate the q_vector napi with all the queue[s] on the vector. + * Caller indicates the lock status. + */ +void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked) +{ + struct ice_rx_ring *rx_ring; + struct ice_tx_ring *tx_ring; + + ice_for_each_rx_ring(rx_ring, q_vector->rx) + __ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index, + NETDEV_QUEUE_TYPE_RX, &q_vector->napi, + locked); + + ice_for_each_tx_ring(tx_ring, q_vector->tx) + __ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index, + NETDEV_QUEUE_TYPE_TX, &q_vector->napi, + locked); + /* Also set the interrupt number for the NAPI */ + netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq); +} + +/** + * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi + * @q_vector: q_vector pointer + * * Associate the q_vector napi with all the queue[s] on the vector */ -void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked) +void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector) { struct ice_rx_ring *rx_ring; struct ice_tx_ring *tx_ring; ice_for_each_rx_ring(rx_ring, q_vector->rx) - ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index, - NETDEV_QUEUE_TYPE_RX, &q_vector->napi, - locked); + ice_queue_set_napi(q_vector->vsi, rx_ring->q_index, + NETDEV_QUEUE_TYPE_RX, &q_vector->napi); ice_for_each_tx_ring(tx_ring, q_vector->tx) - ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index, - NETDEV_QUEUE_TYPE_TX, &q_vector->napi, - locked); + ice_queue_set_napi(q_vector->vsi, tx_ring->q_index, + NETDEV_QUEUE_TYPE_TX, &q_vector->napi); /* Also set the interrupt number for the NAPI */ netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq); } @@ -2824,11 +2822,10 @@ void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked) /** * ice_vsi_set_napi_queues * @vsi: VSI pointer - * @locked: is the rtnl_lock already held * * Associate queue[s] with napi for all vectors */ -void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked) +void ice_vsi_set_napi_queues(struct ice_vsi *vsi) { int i; @@ -2836,7 +2833,7 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked) return; ice_for_each_q_vector(vsi, i) - ice_q_vector_set_napi_queues(vsi->q_vectors[i], locked); + ice_q_vector_set_napi_queues(vsi->q_vectors[i]); } /** @@ -3011,7 +3008,7 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi) } } - tx_ring_stats = vsi_stat->rx_ring_stats; + tx_ring_stats = vsi_stat->tx_ring_stats; vsi_stat->tx_ring_stats = krealloc_array(vsi_stat->tx_ring_stats, req_txq, sizeof(*vsi_stat->tx_ring_stats), diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 0c77d581416a..9cd23afe5f15 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -81,9 +81,15 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params); -void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked); +void +ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index, + enum netdev_queue_type type, struct napi_struct *napi); + +void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked); -void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked); +void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector); + +void ice_vsi_set_napi_queues(struct ice_vsi *vsi); int ice_vsi_release(struct ice_vsi *vsi); @@ -104,8 +110,6 @@ void ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio, bool ena_ts); -void ice_vsi_dis_irq(struct ice_vsi *vsi); - void ice_vsi_free_irq(struct ice_vsi *vsi); void ice_vsi_free_rx_rings(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 9c2c8637b4a7..5d7660bc8846 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3497,7 +3497,7 @@ static void ice_napi_add(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, v_idx) { netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi, ice_napi_poll); - ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false); + __ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false); } } @@ -5383,6 +5383,7 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) if (ret) goto err_reinit; ice_vsi_map_rings_to_vectors(pf->vsi[v]); + ice_vsi_set_napi_queues(pf->vsi[v]); } ret = ice_req_irq_msix_misc(pf); @@ -7001,6 +7002,50 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) } /** + * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI + * @vsi: the VSI being un-configured + */ +static void ice_vsi_dis_irq(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 val; + int i; + + /* disable interrupt causation from each Rx queue; Tx queues are + * handled in ice_vsi_stop_tx_ring() + */ + if (vsi->rx_rings) { + ice_for_each_rxq(vsi, i) { + if (vsi->rx_rings[i]) { + u16 reg; + + reg = vsi->rx_rings[i]->reg_idx; + val = rd32(hw, QINT_RQCTL(reg)); + val &= ~QINT_RQCTL_CAUSE_ENA_M; + wr32(hw, QINT_RQCTL(reg), val); + } + } + } + + /* disable each interrupt */ + ice_for_each_q_vector(vsi, i) { + if (!vsi->q_vectors[i]) + continue; + wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0); + } + + ice_flush(hw); + + /* don't call synchronize_irq() for VF's from the host */ + if (vsi->type == ICE_VSI_VF) + return; + + ice_for_each_q_vector(vsi, i) + synchronize_irq(vsi->q_vectors[i]->irq.virq); +} + +/** * ice_down - Shutdown the connection * @vsi: The VSI being stopped * @@ -7953,6 +7998,8 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, pf_sw = pf->first_sw; /* find the attribute in the netlink message */ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index a94a1c48c3de..a958fcf3e6be 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -240,7 +240,6 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) } vf->lan_vsi_idx = vsi->idx; - vf->lan_vsi_num = vsi->vsi_num; return vsi; } @@ -1068,6 +1067,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) struct ice_pf *pf = pci_get_drvdata(pdev); u16 prev_msix, prev_queues, queues; bool needs_rebuild = false; + struct ice_vsi *vsi; struct ice_vf *vf; int id; @@ -1102,6 +1102,10 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) if (!vf) return -ENOENT; + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -ENOENT; + prev_msix = vf->num_msix; prev_queues = vf->num_vf_qs; @@ -1122,7 +1126,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) if (vf->first_vector_idx < 0) goto unroll; - if (ice_vf_reconfig_vsi(vf)) { + if (ice_vf_reconfig_vsi(vf) || ice_vf_init_host_cfg(vf, vsi)) { /* Try to rebuild with previous values */ needs_rebuild = true; goto unroll; @@ -1148,8 +1152,10 @@ unroll: if (vf->first_vector_idx < 0) return -EINVAL; - if (needs_rebuild) + if (needs_rebuild) { ice_vf_reconfig_vsi(vf); + ice_vf_init_host_cfg(vf, vsi); + } ice_ena_vf_mappings(vf); ice_put_vf(vf); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 2ffdae9a82df..21d26e19338a 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -280,12 +280,6 @@ int ice_vf_reconfig_vsi(struct ice_vf *vf) return err; } - /* Update the lan_vsi_num field since it might have been changed. The - * PF lan_vsi_idx number remains the same so we don't need to change - * that. - */ - vf->lan_vsi_num = vsi->vsi_num; - return 0; } @@ -315,7 +309,6 @@ static int ice_vf_rebuild_vsi(struct ice_vf *vf) * vf->lan_vsi_idx */ vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx); - vf->lan_vsi_num = vsi->vsi_num; return 0; } @@ -1315,13 +1308,12 @@ int ice_vf_init_host_cfg(struct ice_vf *vf, struct ice_vsi *vsi) } /** - * ice_vf_invalidate_vsi - invalidate vsi_idx/vsi_num to remove VSI access + * ice_vf_invalidate_vsi - invalidate vsi_idx to remove VSI access * @vf: VF to remove access to VSI for */ void ice_vf_invalidate_vsi(struct ice_vf *vf) { vf->lan_vsi_idx = ICE_NO_VSI; - vf->lan_vsi_num = ICE_NO_VSI; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index 0cc9034065c5..fec16919ec19 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -109,11 +109,6 @@ struct ice_vf { u8 spoofchk:1; u8 link_forced:1; u8 link_up:1; /* only valid if VF link is forced */ - /* VSI indices - actual VSI pointers are maintained in the PF structure - * When assigned, these will be non-zero, because VSI 0 is always - * the main LAN VSI for the PF. - */ - u16 lan_vsi_num; /* ID as used by firmware */ unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */ unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index c925813ec9ca..1ff9818b4c84 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -440,7 +440,6 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vf->driver_caps = *(u32 *)msg; else vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 | - VIRTCHNL_VF_OFFLOAD_RSS_REG | VIRTCHNL_VF_OFFLOAD_VLAN; vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; @@ -453,14 +452,8 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi, vf->driver_caps); - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; - } else { - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ) - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ; - else - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; - } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC; @@ -506,7 +499,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->rss_lut_size = ICE_LUT_VSI_SIZE; vfres->max_mtu = ice_vc_get_max_frame_size(vf); - vfres->vsi_res[0].vsi_id = vf->lan_vsi_num; + vfres->vsi_res[0].vsi_id = ICE_VF_VSI_ID; vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV; vfres->vsi_res[0].num_queue_pairs = vsi->num_txq; ether_addr_copy(vfres->vsi_res[0].default_mac_addr, @@ -552,27 +545,20 @@ static void ice_vc_reset_vf_msg(struct ice_vf *vf) */ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) { - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - - vsi = ice_find_vsi(pf, vsi_id); - - return (vsi && (vsi->vf == vf)); + return vsi_id == ICE_VF_VSI_ID; } /** * ice_vc_isvalid_q_id - * @vf: pointer to the VF info - * @vsi_id: VSI ID + * @vsi: VSI to check queue ID against * @qid: VSI relative queue ID * * check for the valid queue ID */ -static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid) +static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u8 qid) { - struct ice_vsi *vsi = ice_find_vsi(vf->pf, vsi_id); /* allocated Tx and Rx queues should be always equal for VF VSI */ - return (vsi && (qid < vsi->alloc_txq)); + return qid < vsi->alloc_txq; } /** @@ -1330,7 +1316,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) */ q_map = vqs->rx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1352,7 +1338,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1457,7 +1443,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1483,7 +1469,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF); } else if (q_map) { for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1539,7 +1525,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { vsi_q_id = vsi_q_id_idx; - if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id)) + if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) return VIRTCHNL_STATUS_ERR_PARAM; q_vector->num_ring_rx++; @@ -1553,7 +1539,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { vsi_q_id = vsi_q_id_idx; - if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id)) + if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) return VIRTCHNL_STATUS_ERR_PARAM; q_vector->num_ring_tx++; @@ -1710,7 +1696,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) qpi->txq.headwb_enabled || !ice_vc_isvalid_ring_len(qpi->txq.ring_len) || !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) || - !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) { + !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) { goto error_param; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index 60dfbe05980a..3a4115869153 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -19,6 +19,15 @@ #define ICE_MAX_MACADDR_PER_VF 18 #define ICE_FLEX_DESC_RXDID_MAX_NUM 64 +/* VFs only get a single VSI. For ice hardware, the VF does not need to know + * its VSI index. However, the virtchnl interface requires a VSI number, + * mainly due to legacy hardware. + * + * Since the VF doesn't need this information, report a static value to the VF + * instead of leaking any information about the PF or hardware setup. + */ +#define ICE_VF_VSI_ID 1 + struct ice_virtchnl_ops { int (*get_ver_msg)(struct ice_vf *vf, u8 *msg); int (*get_vf_res_msg)(struct ice_vf *vf, u8 *msg); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 5e19d48a05b4..d796dbd2a440 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -13,8 +13,6 @@ * - opcodes needed by VF when caps are activated * * Caps that don't use new opcodes (no opcodes should be allowed): - * - VIRTCHNL_VF_OFFLOAD_RSS_AQ - * - VIRTCHNL_VF_OFFLOAD_RSS_REG * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR * - VIRTCHNL_VF_OFFLOAD_CRC * - VIRTCHNL_VF_OFFLOAD_RX_POLLING diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index f001553e1a1a..8e4ff3af86c6 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -94,9 +94,6 @@ ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id) if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF)) return -EINVAL; - if (vsi_id != vf->lan_vsi_num) - return -EINVAL; - if (!ice_vc_isvalid_vsi_id(vf, vsi_id)) return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8a051420fa19..1857220d27fe 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -179,6 +179,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) return -EBUSY; usleep_range(1000, 2000); } + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + ice_qvec_toggle_napi(vsi, q_vector, false); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); ice_fill_txq_meta(vsi, tx_ring, &txq_meta); @@ -195,13 +199,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; } - ice_qvec_dis_irq(vsi, rx_ring, q_vector); - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); if (err) return err; - ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); @@ -245,11 +246,11 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) return err; - clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + clear_bit(ICE_CFG_BUSY, vsi->state); return 0; } diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 0acc125decb3..e7a036538246 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -37,8 +37,6 @@ struct idpf_vport_max_q; #define IDPF_MB_MAX_ERR 20 #define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz) \ ((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz)) -#define IDPF_WAIT_FOR_EVENT_TIMEO_MIN 2000 -#define IDPF_WAIT_FOR_EVENT_TIMEO 60000 #define IDPF_MAX_WAIT 500 @@ -66,14 +64,12 @@ struct idpf_mac_filter { /** * enum idpf_state - State machine to handle bring up - * @__IDPF_STARTUP: Start the state machine * @__IDPF_VER_CHECK: Negotiate virtchnl version * @__IDPF_GET_CAPS: Negotiate capabilities * @__IDPF_INIT_SW: Init based on given capabilities * @__IDPF_STATE_LAST: Must be last, used to determine size */ enum idpf_state { - __IDPF_STARTUP, __IDPF_VER_CHECK, __IDPF_GET_CAPS, __IDPF_INIT_SW, @@ -87,6 +83,7 @@ enum idpf_state { * @IDPF_HR_RESET_IN_PROG: Reset in progress * @IDPF_REMOVE_IN_PROG: Driver remove in progress * @IDPF_MB_INTR_MODE: Mailbox in interrupt mode + * @IDPF_VC_CORE_INIT: virtchnl core has been init * @IDPF_FLAGS_NBITS: Must be last */ enum idpf_flags { @@ -95,6 +92,7 @@ enum idpf_flags { IDPF_HR_RESET_IN_PROG, IDPF_REMOVE_IN_PROG, IDPF_MB_INTR_MODE, + IDPF_VC_CORE_INIT, IDPF_FLAGS_NBITS, }; @@ -209,71 +207,6 @@ struct idpf_dev_ops { struct idpf_reg_ops reg_ops; }; -/* These macros allow us to generate an enum and a matching char * array of - * stringified enums that are always in sync. Checkpatch issues a bogus warning - * about this being a complex macro; but it's wrong, these are never used as a - * statement and instead only used to define the enum and array. - */ -#define IDPF_FOREACH_VPORT_VC_STATE(STATE) \ - STATE(IDPF_VC_CREATE_VPORT) \ - STATE(IDPF_VC_CREATE_VPORT_ERR) \ - STATE(IDPF_VC_ENA_VPORT) \ - STATE(IDPF_VC_ENA_VPORT_ERR) \ - STATE(IDPF_VC_DIS_VPORT) \ - STATE(IDPF_VC_DIS_VPORT_ERR) \ - STATE(IDPF_VC_DESTROY_VPORT) \ - STATE(IDPF_VC_DESTROY_VPORT_ERR) \ - STATE(IDPF_VC_CONFIG_TXQ) \ - STATE(IDPF_VC_CONFIG_TXQ_ERR) \ - STATE(IDPF_VC_CONFIG_RXQ) \ - STATE(IDPF_VC_CONFIG_RXQ_ERR) \ - STATE(IDPF_VC_ENA_QUEUES) \ - STATE(IDPF_VC_ENA_QUEUES_ERR) \ - STATE(IDPF_VC_DIS_QUEUES) \ - STATE(IDPF_VC_DIS_QUEUES_ERR) \ - STATE(IDPF_VC_MAP_IRQ) \ - STATE(IDPF_VC_MAP_IRQ_ERR) \ - STATE(IDPF_VC_UNMAP_IRQ) \ - STATE(IDPF_VC_UNMAP_IRQ_ERR) \ - STATE(IDPF_VC_ADD_QUEUES) \ - STATE(IDPF_VC_ADD_QUEUES_ERR) \ - STATE(IDPF_VC_DEL_QUEUES) \ - STATE(IDPF_VC_DEL_QUEUES_ERR) \ - STATE(IDPF_VC_ALLOC_VECTORS) \ - STATE(IDPF_VC_ALLOC_VECTORS_ERR) \ - STATE(IDPF_VC_DEALLOC_VECTORS) \ - STATE(IDPF_VC_DEALLOC_VECTORS_ERR) \ - STATE(IDPF_VC_SET_SRIOV_VFS) \ - STATE(IDPF_VC_SET_SRIOV_VFS_ERR) \ - STATE(IDPF_VC_GET_RSS_LUT) \ - STATE(IDPF_VC_GET_RSS_LUT_ERR) \ - STATE(IDPF_VC_SET_RSS_LUT) \ - STATE(IDPF_VC_SET_RSS_LUT_ERR) \ - STATE(IDPF_VC_GET_RSS_KEY) \ - STATE(IDPF_VC_GET_RSS_KEY_ERR) \ - STATE(IDPF_VC_SET_RSS_KEY) \ - STATE(IDPF_VC_SET_RSS_KEY_ERR) \ - STATE(IDPF_VC_GET_STATS) \ - STATE(IDPF_VC_GET_STATS_ERR) \ - STATE(IDPF_VC_ADD_MAC_ADDR) \ - STATE(IDPF_VC_ADD_MAC_ADDR_ERR) \ - STATE(IDPF_VC_DEL_MAC_ADDR) \ - STATE(IDPF_VC_DEL_MAC_ADDR_ERR) \ - STATE(IDPF_VC_GET_PTYPE_INFO) \ - STATE(IDPF_VC_GET_PTYPE_INFO_ERR) \ - STATE(IDPF_VC_LOOPBACK_STATE) \ - STATE(IDPF_VC_LOOPBACK_STATE_ERR) \ - STATE(IDPF_VC_NBITS) - -#define IDPF_GEN_ENUM(ENUM) ENUM, -#define IDPF_GEN_STRING(STRING) #STRING, - -enum idpf_vport_vc_state { - IDPF_FOREACH_VPORT_VC_STATE(IDPF_GEN_ENUM) -}; - -extern const char * const idpf_vport_vc_state_str[]; - /** * enum idpf_vport_reset_cause - Vport soft reset causes * @IDPF_SR_Q_CHANGE: Soft reset queue change @@ -358,11 +291,7 @@ struct idpf_port_stats { * @port_stats: per port csum, header split, and other offload stats * @link_up: True if link is up * @link_speed_mbps: Link speed in mbps - * @vc_msg: Virtchnl message buffer - * @vc_state: Virtchnl message state - * @vchnl_wq: Wait queue for virtchnl messages * @sw_marker_wq: workqueue for marker packets - * @vc_buf_lock: Lock to protect virtchnl buffer */ struct idpf_vport { u16 num_txq; @@ -408,12 +337,7 @@ struct idpf_vport { bool link_up; u32 link_speed_mbps; - char vc_msg[IDPF_CTLQ_MAX_BUF_LEN]; - DECLARE_BITMAP(vc_state, IDPF_VC_NBITS); - - wait_queue_head_t vchnl_wq; wait_queue_head_t sw_marker_wq; - struct mutex vc_buf_lock; }; /** @@ -476,15 +400,11 @@ struct idpf_vport_user_config_data { * enum idpf_vport_config_flags - Vport config flags * @IDPF_VPORT_REG_NETDEV: Register netdev * @IDPF_VPORT_UP_REQUESTED: Set if interface up is requested on core reset - * @IDPF_VPORT_ADD_MAC_REQ: Asynchronous add ether address in flight - * @IDPF_VPORT_DEL_MAC_REQ: Asynchronous delete ether address in flight * @IDPF_VPORT_CONFIG_FLAGS_NBITS: Must be last */ enum idpf_vport_config_flags { IDPF_VPORT_REG_NETDEV, IDPF_VPORT_UP_REQUESTED, - IDPF_VPORT_ADD_MAC_REQ, - IDPF_VPORT_DEL_MAC_REQ, IDPF_VPORT_CONFIG_FLAGS_NBITS, }; @@ -555,11 +475,13 @@ struct idpf_vector_lifo { struct idpf_vport_config { struct idpf_vport_user_config_data user_config; struct idpf_vport_max_q max_q; - void *req_qs_chunks; + struct virtchnl2_add_queues *req_qs_chunks; spinlock_t mac_filter_list_lock; DECLARE_BITMAP(flags, IDPF_VPORT_CONFIG_FLAGS_NBITS); }; +struct idpf_vc_xn_manager; + /** * struct idpf_adapter - Device data struct generated on probe * @pdev: PCI device struct given on probe @@ -601,9 +523,7 @@ struct idpf_vport_config { * @stats_task: Periodic statistics retrieval task * @stats_wq: Workqueue for statistics task * @caps: Negotiated capabilities with device - * @vchnl_wq: Wait queue for virtchnl messages - * @vc_state: Virtchnl message state - * @vc_msg: Virtchnl message buffer + * @vcxn_mngr: Virtchnl transaction manager * @dev_ops: See idpf_dev_ops * @num_vfs: Number of allocated VFs through sysfs. PF does not directly talk * to VFs but is used to initialize them @@ -659,10 +579,8 @@ struct idpf_adapter { struct delayed_work stats_task; struct workqueue_struct *stats_wq; struct virtchnl2_get_capabilities caps; + struct idpf_vc_xn_manager *vcxn_mngr; - wait_queue_head_t vchnl_wq; - DECLARE_BITMAP(vc_state, IDPF_VC_NBITS); - char vc_msg[IDPF_CTLQ_MAX_BUF_LEN]; struct idpf_dev_ops dev_ops; int num_vfs; bool crc_enable; @@ -903,68 +821,18 @@ void idpf_mbx_task(struct work_struct *work); void idpf_vc_event_task(struct work_struct *work); void idpf_dev_ops_init(struct idpf_adapter *adapter); void idpf_vf_dev_ops_init(struct idpf_adapter *adapter); -int idpf_vport_adjust_qs(struct idpf_vport *vport); -int idpf_init_dflt_mbx(struct idpf_adapter *adapter); -void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter); -int idpf_vc_core_init(struct idpf_adapter *adapter); -void idpf_vc_core_deinit(struct idpf_adapter *adapter); int idpf_intr_req(struct idpf_adapter *adapter); void idpf_intr_rel(struct idpf_adapter *adapter); -int idpf_get_reg_intr_vecs(struct idpf_vport *vport, - struct idpf_vec_regs *reg_vals); u16 idpf_get_max_tx_hdr_size(struct idpf_adapter *adapter); -int idpf_send_delete_queues_msg(struct idpf_vport *vport); -int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q, - u16 num_complq, u16 num_rx_q, u16 num_rx_bufq); int idpf_initiate_soft_reset(struct idpf_vport *vport, enum idpf_vport_reset_cause reset_cause); -int idpf_send_enable_vport_msg(struct idpf_vport *vport); -int idpf_send_disable_vport_msg(struct idpf_vport *vport); -int idpf_send_destroy_vport_msg(struct idpf_vport *vport); -int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport); -int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport); -int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get); -int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get); -int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter); -int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors); void idpf_deinit_task(struct idpf_adapter *adapter); int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter, u16 *q_vector_idxs, struct idpf_vector_info *vec_info); -int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport); -int idpf_send_get_stats_msg(struct idpf_vport *vport); -int idpf_get_vec_ids(struct idpf_adapter *adapter, - u16 *vecids, int num_vecids, - struct virtchnl2_vector_chunks *chunks); -int idpf_recv_mb_msg(struct idpf_adapter *adapter, u32 op, - void *msg, int msg_size); -int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op, - u16 msg_size, u8 *msg); void idpf_set_ethtool_ops(struct net_device *netdev); -int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter, - struct idpf_vport_max_q *max_q); -void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter, - struct idpf_vport_max_q *max_q); -int idpf_add_del_mac_filters(struct idpf_vport *vport, - struct idpf_netdev_priv *np, - bool add, bool async); -int idpf_set_promiscuous(struct idpf_adapter *adapter, - struct idpf_vport_user_config_data *config_data, - u32 vport_id); -int idpf_send_disable_queues_msg(struct idpf_vport *vport); -void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q); -u32 idpf_get_vport_id(struct idpf_vport *vport); -int idpf_vport_queue_ids_init(struct idpf_vport *vport); -int idpf_queue_reg_init(struct idpf_vport *vport); -int idpf_send_config_queues_msg(struct idpf_vport *vport); -int idpf_send_enable_queues_msg(struct idpf_vport *vport); -int idpf_send_create_vport_msg(struct idpf_adapter *adapter, - struct idpf_vport_max_q *max_q); -int idpf_check_supported_desc_ids(struct idpf_vport *vport); void idpf_vport_intr_write_itr(struct idpf_q_vector *q_vector, u16 itr, bool tx); -int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map); -int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs); int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs); u8 idpf_vport_get_hsplit(const struct idpf_vport *vport); diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c index c7f43d2fcd13..4849590a5591 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -516,6 +516,8 @@ post_buffs_out: /* Wrap to end of end ring since current ntp is 0 */ cq->next_to_post = cq->ring_size - 1; + dma_wmb(); + wr32(hw, cq->reg.tail, cq->next_to_post); } @@ -546,11 +548,6 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, int err = 0; u16 i; - if (*num_q_msg == 0) - return 0; - else if (*num_q_msg > cq->ring_size) - return -EBADR; - /* take the lock before we start messing with the ring */ mutex_lock(&cq->cq_lock); diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h index 8dee098bbfb0..e8e046ef2f0d 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h @@ -69,6 +69,11 @@ struct idpf_ctlq_msg { u8 context[IDPF_INDIRECT_CTX_SIZE]; struct idpf_dma_mem *payload; } indirect; + struct { + u32 rsvd; + u16 data; + u16 flags; + } sw_cookie; } ctx; }; diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c index 34ad1ac46b78..3df9935685e9 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_dev.c +++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c @@ -3,6 +3,7 @@ #include "idpf.h" #include "idpf_lan_pf_regs.h" +#include "idpf_virtchnl.h" #define IDPF_PF_ITR_IDX_SPACING 0x4 diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 58179bd733ff..5d3532c27d57 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2,14 +2,11 @@ /* Copyright (C) 2023 Intel Corporation */ #include "idpf.h" +#include "idpf_virtchnl.h" static const struct net_device_ops idpf_netdev_ops_splitq; static const struct net_device_ops idpf_netdev_ops_singleq; -const char * const idpf_vport_vc_state_str[] = { - IDPF_FOREACH_VPORT_VC_STATE(IDPF_GEN_STRING) -}; - /** * idpf_init_vector_stack - Fill the MSIX vector stack with vector index * @adapter: private data struct @@ -82,19 +79,12 @@ static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter) */ void idpf_intr_rel(struct idpf_adapter *adapter) { - int err; - if (!adapter->msix_entries) return; idpf_mb_intr_rel_irq(adapter); pci_free_irq_vectors(adapter->pdev); - - err = idpf_send_dealloc_vectors_msg(adapter); - if (err) - dev_err(&adapter->pdev->dev, - "Failed to deallocate vectors: %d\n", err); - + idpf_send_dealloc_vectors_msg(adapter); idpf_deinit_vector_stack(adapter); kfree(adapter->msix_entries); adapter->msix_entries = NULL; @@ -975,7 +965,6 @@ static void idpf_vport_rel(struct idpf_vport *vport) struct idpf_rss_data *rss_data; struct idpf_vport_max_q max_q; u16 idx = vport->idx; - int i; vport_config = adapter->vport_config[vport->idx]; idpf_deinit_rss(vport); @@ -985,20 +974,6 @@ static void idpf_vport_rel(struct idpf_vport *vport) idpf_send_destroy_vport_msg(vport); - /* Set all bits as we dont know on which vc_state the vport vhnl_wq - * is waiting on and wakeup the virtchnl workqueue even if it is - * waiting for the response as we are going down - */ - for (i = 0; i < IDPF_VC_NBITS; i++) - set_bit(i, vport->vc_state); - wake_up(&vport->vchnl_wq); - - mutex_destroy(&vport->vc_buf_lock); - - /* Clear all the bits */ - for (i = 0; i < IDPF_VC_NBITS; i++) - clear_bit(i, vport->vc_state); - /* Release all max queues allocated to the adapter's pool */ max_q.max_rxq = vport_config->max_q.max_rxq; max_q.max_txq = vport_config->max_q.max_txq; @@ -1253,7 +1228,7 @@ void idpf_mbx_task(struct work_struct *work) queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, msecs_to_jiffies(300)); - idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_UNKNOWN, NULL, 0); + idpf_recv_mb_msg(adapter); } /** @@ -1543,9 +1518,7 @@ void idpf_init_task(struct work_struct *work) vport_config = adapter->vport_config[index]; init_waitqueue_head(&vport->sw_marker_wq); - init_waitqueue_head(&vport->vchnl_wq); - mutex_init(&vport->vc_buf_lock); spin_lock_init(&vport_config->mac_filter_list_lock); INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list); @@ -1823,6 +1796,8 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter) goto unlock_mutex; } + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); + /* Initialize the state machine, also allocate memory and request * resources */ @@ -1902,7 +1877,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, * mess with. Nothing below should use those variables from new_vport * and should instead always refer to them in vport if they need to. */ - memcpy(new_vport, vport, offsetof(struct idpf_vport, vc_state)); + memcpy(new_vport, vport, offsetof(struct idpf_vport, link_speed_mbps)); /* Adjust resource parameters prior to reallocating resources */ switch (reset_cause) { @@ -1951,7 +1926,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, /* Same comment as above regarding avoiding copying the wait_queues and * mutexes applies here. We do not want to mess with those if possible. */ - memcpy(vport, new_vport, offsetof(struct idpf_vport, vc_state)); + memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps)); /* Since idpf_vport_queues_alloc was called with new_port, the queue * back pointers are currently pointing to the local new_vport. Reset diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index e1febc74cefd..f784eea044bd 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -3,6 +3,7 @@ #include "idpf.h" #include "idpf_devids.h" +#include "idpf_virtchnl.h" #define DRV_SUMMARY "Intel(R) Infrastructure Data Path Function Linux Driver" @@ -30,6 +31,7 @@ static void idpf_remove(struct pci_dev *pdev) idpf_sriov_configure(pdev, 0); idpf_vc_core_deinit(adapter); + /* Be a good citizen and leave the device clean on exit */ adapter->dev_ops.reg_ops.trigger_reset(adapter, IDPF_HR_FUNC_RESET); idpf_deinit_dflt_mbx(adapter); @@ -66,6 +68,8 @@ destroy_wqs: adapter->vport_config = NULL; kfree(adapter->netdevs); adapter->netdevs = NULL; + kfree(adapter->vcxn_mngr); + adapter->vcxn_mngr = NULL; mutex_destroy(&adapter->vport_ctrl_lock); mutex_destroy(&adapter->vector_lock); @@ -229,8 +233,6 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mutex_init(&adapter->queue_lock); mutex_init(&adapter->vc_buf_lock); - init_waitqueue_head(&adapter->vchnl_wq); - INIT_DELAYED_WORK(&adapter->init_task, idpf_init_task); INIT_DELAYED_WORK(&adapter->serv_task, idpf_service_task); INIT_DELAYED_WORK(&adapter->mbx_task, idpf_mbx_task); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 2f8ad79ae3f0..6dd7a66bb897 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2,6 +2,7 @@ /* Copyright (C) 2023 Intel Corporation */ #include "idpf.h" +#include "idpf_virtchnl.h" /** * idpf_buf_lifo_push - push a buffer pointer onto stack diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c index 8ade4e3a9fe1..629cb5cb7c9f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c +++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c @@ -3,6 +3,7 @@ #include "idpf.h" #include "idpf_lan_vf_regs.h" +#include "idpf_virtchnl.h" #define IDPF_VF_ITR_IDX_SPACING 0x40 @@ -137,7 +138,7 @@ static void idpf_vf_trigger_reset(struct idpf_adapter *adapter, /* Do not send VIRTCHNL2_OP_RESET_VF message on driver unload */ if (trig_cause == IDPF_HR_FUNC_RESET && !test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) - idpf_send_mb_msg(adapter, VIRTCHNL2_OP_RESET_VF, 0, NULL); + idpf_send_mb_msg(adapter, VIRTCHNL2_OP_RESET_VF, 0, NULL, 0); } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index d0cdd63b3d5b..a5f9b7a5effe 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -2,46 +2,192 @@ /* Copyright (C) 2023 Intel Corporation */ #include "idpf.h" +#include "idpf_virtchnl.h" + +#define IDPF_VC_XN_MIN_TIMEOUT_MSEC 2000 +#define IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC (60 * 1000) +#define IDPF_VC_XN_IDX_M GENMASK(7, 0) +#define IDPF_VC_XN_SALT_M GENMASK(15, 8) +#define IDPF_VC_XN_RING_LEN U8_MAX + +/** + * enum idpf_vc_xn_state - Virtchnl transaction status + * @IDPF_VC_XN_IDLE: not expecting a reply, ready to be used + * @IDPF_VC_XN_WAITING: expecting a reply, not yet received + * @IDPF_VC_XN_COMPLETED_SUCCESS: a reply was expected and received, + * buffer updated + * @IDPF_VC_XN_COMPLETED_FAILED: a reply was expected and received, but there + * was an error, buffer not updated + * @IDPF_VC_XN_SHUTDOWN: transaction object cannot be used, VC torn down + * @IDPF_VC_XN_ASYNC: transaction sent asynchronously and doesn't have the + * return context; a callback may be provided to handle + * return + */ +enum idpf_vc_xn_state { + IDPF_VC_XN_IDLE = 1, + IDPF_VC_XN_WAITING, + IDPF_VC_XN_COMPLETED_SUCCESS, + IDPF_VC_XN_COMPLETED_FAILED, + IDPF_VC_XN_SHUTDOWN, + IDPF_VC_XN_ASYNC, +}; + +struct idpf_vc_xn; +/* Callback for asynchronous messages */ +typedef int (*async_vc_cb) (struct idpf_adapter *, struct idpf_vc_xn *, + const struct idpf_ctlq_msg *); + +/** + * struct idpf_vc_xn - Data structure representing virtchnl transactions + * @completed: virtchnl event loop uses that to signal when a reply is + * available, uses kernel completion API + * @state: virtchnl event loop stores the data below, protected by the + * completion's lock. + * @reply_sz: Original size of reply, may be > reply_buf.iov_len; it will be + * truncated on its way to the receiver thread according to + * reply_buf.iov_len. + * @reply: Reference to the buffer(s) where the reply data should be written + * to. May be 0-length (then NULL address permitted) if the reply data + * should be ignored. + * @async_handler: if sent asynchronously, a callback can be provided to handle + * the reply when it's received + * @vc_op: corresponding opcode sent with this transaction + * @idx: index used as retrieval on reply receive, used for cookie + * @salt: changed every message to make unique, used for cookie + */ +struct idpf_vc_xn { + struct completion completed; + enum idpf_vc_xn_state state; + size_t reply_sz; + struct kvec reply; + async_vc_cb async_handler; + u32 vc_op; + u8 idx; + u8 salt; +}; + +/** + * struct idpf_vc_xn_params - Parameters for executing transaction + * @send_buf: kvec for send buffer + * @recv_buf: kvec for recv buffer, may be NULL, must then have zero length + * @timeout_ms: timeout to wait for reply + * @async: send message asynchronously, will not wait on completion + * @async_handler: If sent asynchronously, optional callback handler. The user + * must be careful when using async handlers as the memory for + * the recv_buf _cannot_ be on stack if this is async. + * @vc_op: virtchnl op to send + */ +struct idpf_vc_xn_params { + struct kvec send_buf; + struct kvec recv_buf; + int timeout_ms; + bool async; + async_vc_cb async_handler; + u32 vc_op; +}; + +/** + * struct idpf_vc_xn_manager - Manager for tracking transactions + * @ring: backing and lookup for transactions + * @free_xn_bm: bitmap for free transactions + * @xn_bm_lock: make bitmap access synchronous where necessary + * @salt: used to make cookie unique every message + */ +struct idpf_vc_xn_manager { + struct idpf_vc_xn ring[IDPF_VC_XN_RING_LEN]; + DECLARE_BITMAP(free_xn_bm, IDPF_VC_XN_RING_LEN); + spinlock_t xn_bm_lock; + u8 salt; +}; + +/** + * idpf_vid_to_vport - Translate vport id to vport pointer + * @adapter: private data struct + * @v_id: vport id to translate + * + * Returns vport matching v_id, NULL if not found. + */ +static +struct idpf_vport *idpf_vid_to_vport(struct idpf_adapter *adapter, u32 v_id) +{ + u16 num_max_vports = idpf_get_max_vports(adapter); + int i; + + for (i = 0; i < num_max_vports; i++) + if (adapter->vport_ids[i] == v_id) + return adapter->vports[i]; + + return NULL; +} + +/** + * idpf_handle_event_link - Handle link event message + * @adapter: private data struct + * @v2e: virtchnl event message + */ +static void idpf_handle_event_link(struct idpf_adapter *adapter, + const struct virtchnl2_event *v2e) +{ + struct idpf_netdev_priv *np; + struct idpf_vport *vport; + + vport = idpf_vid_to_vport(adapter, le32_to_cpu(v2e->vport_id)); + if (!vport) { + dev_err_ratelimited(&adapter->pdev->dev, "Failed to find vport_id %d for link event\n", + v2e->vport_id); + return; + } + np = netdev_priv(vport->netdev); + + vport->link_speed_mbps = le32_to_cpu(v2e->link_speed); + + if (vport->link_up == v2e->link_status) + return; + + vport->link_up = v2e->link_status; + + if (np->state != __IDPF_VPORT_UP) + return; + + if (vport->link_up) { + netif_tx_start_all_queues(vport->netdev); + netif_carrier_on(vport->netdev); + } else { + netif_tx_stop_all_queues(vport->netdev); + netif_carrier_off(vport->netdev); + } +} /** * idpf_recv_event_msg - Receive virtchnl event message - * @vport: virtual port structure + * @adapter: Driver specific private structure * @ctlq_msg: message to copy from * * Receive virtchnl event message */ -static void idpf_recv_event_msg(struct idpf_vport *vport, +static void idpf_recv_event_msg(struct idpf_adapter *adapter, struct idpf_ctlq_msg *ctlq_msg) { - struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + int payload_size = ctlq_msg->ctx.indirect.payload->size; struct virtchnl2_event *v2e; - bool link_status; u32 event; + if (payload_size < sizeof(*v2e)) { + dev_err_ratelimited(&adapter->pdev->dev, "Failed to receive valid payload for event msg (op %d len %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode, + payload_size); + return; + } + v2e = (struct virtchnl2_event *)ctlq_msg->ctx.indirect.payload->va; event = le32_to_cpu(v2e->event); switch (event) { case VIRTCHNL2_EVENT_LINK_CHANGE: - vport->link_speed_mbps = le32_to_cpu(v2e->link_speed); - link_status = v2e->link_status; - - if (vport->link_up == link_status) - break; - - vport->link_up = link_status; - if (np->state == __IDPF_VPORT_UP) { - if (vport->link_up) { - netif_carrier_on(vport->netdev); - netif_tx_start_all_queues(vport->netdev); - } else { - netif_tx_stop_all_queues(vport->netdev); - netif_carrier_off(vport->netdev); - } - } - break; + idpf_handle_event_link(adapter, v2e); + return; default: - dev_err(&vport->adapter->pdev->dev, + dev_err(&adapter->pdev->dev, "Unknown event %d from PF\n", event); break; } @@ -93,13 +239,14 @@ err_kfree: * @op: virtchnl opcode * @msg_size: size of the payload * @msg: pointer to buffer holding the payload + * @cookie: unique SW generated cookie per message * * Will prepare the control queue message and initiates the send api * * Returns 0 on success, negative on failure */ int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op, - u16 msg_size, u8 *msg) + u16 msg_size, u8 *msg, u16 cookie) { struct idpf_ctlq_msg *ctlq_msg; struct idpf_dma_mem *dma_mem; @@ -139,8 +286,12 @@ int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op, err = -ENOMEM; goto dma_alloc_error; } - memcpy(dma_mem->va, msg, msg_size); + + /* It's possible we're just sending an opcode but no buffer */ + if (msg && msg_size) + memcpy(dma_mem->va, msg, msg_size); ctlq_msg->ctx.indirect.payload = dma_mem; + ctlq_msg->ctx.sw_cookie.data = cookie; err = idpf_ctlq_send(&adapter->hw, adapter->hw.asq, 1, ctlq_msg); if (err) @@ -159,592 +310,432 @@ dma_mem_error: return err; } -/** - * idpf_find_vport - Find vport pointer from control queue message - * @adapter: driver specific private structure - * @vport: address of vport pointer to copy the vport from adapters vport list - * @ctlq_msg: control queue message +/* API for virtchnl "transaction" support ("xn" for short). * - * Return 0 on success, error value on failure. Also this function does check - * for the opcodes which expect to receive payload and return error value if - * it is not the case. + * We are reusing the completion lock to serialize the accesses to the + * transaction state for simplicity, but it could be its own separate synchro + * as well. For now, this API is only used from within a workqueue context; + * raw_spin_lock() is enough. */ -static int idpf_find_vport(struct idpf_adapter *adapter, - struct idpf_vport **vport, - struct idpf_ctlq_msg *ctlq_msg) -{ - bool no_op = false, vid_found = false; - int i, err = 0; - char *vc_msg; - u32 v_id; +/** + * idpf_vc_xn_lock - Request exclusive access to vc transaction + * @xn: struct idpf_vc_xn* to access + */ +#define idpf_vc_xn_lock(xn) \ + raw_spin_lock(&(xn)->completed.wait.lock) - vc_msg = kcalloc(IDPF_CTLQ_MAX_BUF_LEN, sizeof(char), GFP_KERNEL); - if (!vc_msg) - return -ENOMEM; +/** + * idpf_vc_xn_unlock - Release exclusive access to vc transaction + * @xn: struct idpf_vc_xn* to access + */ +#define idpf_vc_xn_unlock(xn) \ + raw_spin_unlock(&(xn)->completed.wait.lock) - if (ctlq_msg->data_len) { - size_t payload_size = ctlq_msg->ctx.indirect.payload->size; +/** + * idpf_vc_xn_release_bufs - Release reference to reply buffer(s) and + * reset the transaction state. + * @xn: struct idpf_vc_xn to update + */ +static void idpf_vc_xn_release_bufs(struct idpf_vc_xn *xn) +{ + xn->reply.iov_base = NULL; + xn->reply.iov_len = 0; - if (!payload_size) { - dev_err(&adapter->pdev->dev, "Failed to receive payload buffer\n"); - kfree(vc_msg); + if (xn->state != IDPF_VC_XN_SHUTDOWN) + xn->state = IDPF_VC_XN_IDLE; +} - return -EINVAL; - } +/** + * idpf_vc_xn_init - Initialize virtchnl transaction object + * @vcxn_mngr: pointer to vc transaction manager struct + */ +static void idpf_vc_xn_init(struct idpf_vc_xn_manager *vcxn_mngr) +{ + int i; - memcpy(vc_msg, ctlq_msg->ctx.indirect.payload->va, - min_t(size_t, payload_size, IDPF_CTLQ_MAX_BUF_LEN)); - } - - switch (ctlq_msg->cookie.mbx.chnl_opcode) { - case VIRTCHNL2_OP_VERSION: - case VIRTCHNL2_OP_GET_CAPS: - case VIRTCHNL2_OP_CREATE_VPORT: - case VIRTCHNL2_OP_SET_SRIOV_VFS: - case VIRTCHNL2_OP_ALLOC_VECTORS: - case VIRTCHNL2_OP_DEALLOC_VECTORS: - case VIRTCHNL2_OP_GET_PTYPE_INFO: - goto free_vc_msg; - case VIRTCHNL2_OP_ENABLE_VPORT: - case VIRTCHNL2_OP_DISABLE_VPORT: - case VIRTCHNL2_OP_DESTROY_VPORT: - v_id = le32_to_cpu(((struct virtchnl2_vport *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_CONFIG_TX_QUEUES: - v_id = le32_to_cpu(((struct virtchnl2_config_tx_queues *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_CONFIG_RX_QUEUES: - v_id = le32_to_cpu(((struct virtchnl2_config_rx_queues *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_ENABLE_QUEUES: - case VIRTCHNL2_OP_DISABLE_QUEUES: - case VIRTCHNL2_OP_DEL_QUEUES: - v_id = le32_to_cpu(((struct virtchnl2_del_ena_dis_queues *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_ADD_QUEUES: - v_id = le32_to_cpu(((struct virtchnl2_add_queues *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_MAP_QUEUE_VECTOR: - case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR: - v_id = le32_to_cpu(((struct virtchnl2_queue_vector_maps *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_GET_STATS: - v_id = le32_to_cpu(((struct virtchnl2_vport_stats *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_GET_RSS_LUT: - case VIRTCHNL2_OP_SET_RSS_LUT: - v_id = le32_to_cpu(((struct virtchnl2_rss_lut *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_GET_RSS_KEY: - case VIRTCHNL2_OP_SET_RSS_KEY: - v_id = le32_to_cpu(((struct virtchnl2_rss_key *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_EVENT: - v_id = le32_to_cpu(((struct virtchnl2_event *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_LOOPBACK: - v_id = le32_to_cpu(((struct virtchnl2_loopback *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE: - v_id = le32_to_cpu(((struct virtchnl2_promisc_info *)vc_msg)->vport_id); - break; - case VIRTCHNL2_OP_ADD_MAC_ADDR: - case VIRTCHNL2_OP_DEL_MAC_ADDR: - v_id = le32_to_cpu(((struct virtchnl2_mac_addr_list *)vc_msg)->vport_id); - break; - default: - no_op = true; - break; - } + spin_lock_init(&vcxn_mngr->xn_bm_lock); - if (no_op) - goto free_vc_msg; + for (i = 0; i < ARRAY_SIZE(vcxn_mngr->ring); i++) { + struct idpf_vc_xn *xn = &vcxn_mngr->ring[i]; - for (i = 0; i < idpf_get_max_vports(adapter); i++) { - if (adapter->vport_ids[i] == v_id) { - vid_found = true; - break; - } + xn->state = IDPF_VC_XN_IDLE; + xn->idx = i; + idpf_vc_xn_release_bufs(xn); + init_completion(&xn->completed); } - if (vid_found) - *vport = adapter->vports[i]; - else - err = -EINVAL; - -free_vc_msg: - kfree(vc_msg); - - return err; + bitmap_fill(vcxn_mngr->free_xn_bm, IDPF_VC_XN_RING_LEN); } /** - * idpf_copy_data_to_vc_buf - Copy the virtchnl response data into the buffer. - * @adapter: driver specific private structure - * @vport: virtual port structure - * @ctlq_msg: msg to copy from - * @err_enum: err bit to set on error + * idpf_vc_xn_shutdown - Uninitialize virtchnl transaction object + * @vcxn_mngr: pointer to vc transaction manager struct * - * Copies the payload from ctlq_msg into virtchnl buffer. Returns 0 on success, - * negative on failure. + * All waiting threads will be woken-up and their transaction aborted. Further + * operations on that object will fail. */ -static int idpf_copy_data_to_vc_buf(struct idpf_adapter *adapter, - struct idpf_vport *vport, - struct idpf_ctlq_msg *ctlq_msg, - enum idpf_vport_vc_state err_enum) +static void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr) { - if (ctlq_msg->cookie.mbx.chnl_retval) { - if (vport) - set_bit(err_enum, vport->vc_state); - else - set_bit(err_enum, adapter->vc_state); + int i; - return -EINVAL; - } + spin_lock_bh(&vcxn_mngr->xn_bm_lock); + bitmap_zero(vcxn_mngr->free_xn_bm, IDPF_VC_XN_RING_LEN); + spin_unlock_bh(&vcxn_mngr->xn_bm_lock); - if (vport) - memcpy(vport->vc_msg, ctlq_msg->ctx.indirect.payload->va, - min_t(int, ctlq_msg->ctx.indirect.payload->size, - IDPF_CTLQ_MAX_BUF_LEN)); - else - memcpy(adapter->vc_msg, ctlq_msg->ctx.indirect.payload->va, - min_t(int, ctlq_msg->ctx.indirect.payload->size, - IDPF_CTLQ_MAX_BUF_LEN)); + for (i = 0; i < ARRAY_SIZE(vcxn_mngr->ring); i++) { + struct idpf_vc_xn *xn = &vcxn_mngr->ring[i]; - return 0; + idpf_vc_xn_lock(xn); + xn->state = IDPF_VC_XN_SHUTDOWN; + idpf_vc_xn_release_bufs(xn); + idpf_vc_xn_unlock(xn); + complete_all(&xn->completed); + } } /** - * idpf_recv_vchnl_op - helper function with common logic when handling the - * reception of VIRTCHNL OPs. - * @adapter: driver specific private structure - * @vport: virtual port structure - * @ctlq_msg: msg to copy from - * @state: state bit used on timeout check - * @err_state: err bit to set on error + * idpf_vc_xn_pop_free - Pop a free transaction from free list + * @vcxn_mngr: transaction manager to pop from + * + * Returns NULL if no free transactions */ -static void idpf_recv_vchnl_op(struct idpf_adapter *adapter, - struct idpf_vport *vport, - struct idpf_ctlq_msg *ctlq_msg, - enum idpf_vport_vc_state state, - enum idpf_vport_vc_state err_state) +static +struct idpf_vc_xn *idpf_vc_xn_pop_free(struct idpf_vc_xn_manager *vcxn_mngr) { - wait_queue_head_t *vchnl_wq; - int err; + struct idpf_vc_xn *xn = NULL; + unsigned long free_idx; - if (vport) - vchnl_wq = &vport->vchnl_wq; - else - vchnl_wq = &adapter->vchnl_wq; + spin_lock_bh(&vcxn_mngr->xn_bm_lock); + free_idx = find_first_bit(vcxn_mngr->free_xn_bm, IDPF_VC_XN_RING_LEN); + if (free_idx == IDPF_VC_XN_RING_LEN) + goto do_unlock; - err = idpf_copy_data_to_vc_buf(adapter, vport, ctlq_msg, err_state); - if (wq_has_sleeper(vchnl_wq)) { - if (vport) - set_bit(state, vport->vc_state); - else - set_bit(state, adapter->vc_state); + clear_bit(free_idx, vcxn_mngr->free_xn_bm); + xn = &vcxn_mngr->ring[free_idx]; + xn->salt = vcxn_mngr->salt++; - wake_up(vchnl_wq); - } else { - if (!err) { - dev_warn(&adapter->pdev->dev, "opcode %d received without waiting thread\n", - ctlq_msg->cookie.mbx.chnl_opcode); - } else { - /* Clear the errors since there is no sleeper to pass - * them on - */ - if (vport) - clear_bit(err_state, vport->vc_state); - else - clear_bit(err_state, adapter->vc_state); - } - } +do_unlock: + spin_unlock_bh(&vcxn_mngr->xn_bm_lock); + + return xn; } /** - * idpf_recv_mb_msg - Receive message over mailbox - * @adapter: Driver specific private structure - * @op: virtchannel operation code - * @msg: Received message holding buffer - * @msg_size: message size - * - * Will receive control queue message and posts the receive buffer. Returns 0 - * on success and negative on failure. + * idpf_vc_xn_push_free - Push a free transaction to free list + * @vcxn_mngr: transaction manager to push to + * @xn: transaction to push */ -int idpf_recv_mb_msg(struct idpf_adapter *adapter, u32 op, - void *msg, int msg_size) +static void idpf_vc_xn_push_free(struct idpf_vc_xn_manager *vcxn_mngr, + struct idpf_vc_xn *xn) { - struct idpf_vport *vport = NULL; - struct idpf_ctlq_msg ctlq_msg; - struct idpf_dma_mem *dma_mem; - bool work_done = false; - int num_retry = 2000; - u16 num_q_msg; - int err; - - while (1) { - struct idpf_vport_config *vport_config; - int payload_size = 0; - - /* Try to get one message */ - num_q_msg = 1; - dma_mem = NULL; - err = idpf_ctlq_recv(adapter->hw.arq, &num_q_msg, &ctlq_msg); - /* If no message then decide if we have to retry based on - * opcode - */ - if (err || !num_q_msg) { - /* Increasing num_retry to consider the delayed - * responses because of large number of VF's mailbox - * messages. If the mailbox message is received from - * the other side, we come out of the sleep cycle - * immediately else we wait for more time. - */ - if (!op || !num_retry--) - break; - if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) { - err = -EIO; - break; - } - msleep(20); - continue; - } + idpf_vc_xn_release_bufs(xn); + set_bit(xn->idx, vcxn_mngr->free_xn_bm); +} - /* If we are here a message is received. Check if we are looking - * for a specific message based on opcode. If it is different - * ignore and post buffers +/** + * idpf_vc_xn_exec - Perform a send/recv virtchnl transaction + * @adapter: driver specific private structure with vcxn_mngr + * @params: parameters for this particular transaction including + * -vc_op: virtchannel operation to send + * -send_buf: kvec iov for send buf and len + * -recv_buf: kvec iov for recv buf and len (ignored if NULL) + * -timeout_ms: timeout waiting for a reply (milliseconds) + * -async: don't wait for message reply, will lose caller context + * -async_handler: callback to handle async replies + * + * @returns >= 0 for success, the size of the initial reply (may or may not be + * >= @recv_buf.iov_len, but we never overflow @@recv_buf_iov_base). < 0 for + * error. + */ +static ssize_t idpf_vc_xn_exec(struct idpf_adapter *adapter, + const struct idpf_vc_xn_params *params) +{ + const struct kvec *send_buf = ¶ms->send_buf; + struct idpf_vc_xn *xn; + ssize_t retval; + u16 cookie; + + xn = idpf_vc_xn_pop_free(adapter->vcxn_mngr); + /* no free transactions available */ + if (!xn) + return -ENOSPC; + + idpf_vc_xn_lock(xn); + if (xn->state == IDPF_VC_XN_SHUTDOWN) { + retval = -ENXIO; + goto only_unlock; + } else if (xn->state != IDPF_VC_XN_IDLE) { + /* We're just going to clobber this transaction even though + * it's not IDLE. If we don't reuse it we could theoretically + * eventually leak all the free transactions and not be able to + * send any messages. At least this way we make an attempt to + * remain functional even though something really bad is + * happening that's corrupting what was supposed to be free + * transactions. */ - if (op && ctlq_msg.cookie.mbx.chnl_opcode != op) - goto post_buffs; + WARN_ONCE(1, "There should only be idle transactions in free list (idx %d op %d)\n", + xn->idx, xn->vc_op); + } - err = idpf_find_vport(adapter, &vport, &ctlq_msg); - if (err) - goto post_buffs; + xn->reply = params->recv_buf; + xn->reply_sz = 0; + xn->state = params->async ? IDPF_VC_XN_ASYNC : IDPF_VC_XN_WAITING; + xn->vc_op = params->vc_op; + xn->async_handler = params->async_handler; + idpf_vc_xn_unlock(xn); - if (ctlq_msg.data_len) - payload_size = ctlq_msg.ctx.indirect.payload->size; + if (!params->async) + reinit_completion(&xn->completed); + cookie = FIELD_PREP(IDPF_VC_XN_SALT_M, xn->salt) | + FIELD_PREP(IDPF_VC_XN_IDX_M, xn->idx); - /* All conditions are met. Either a message requested is - * received or we received a message to be processed - */ - switch (ctlq_msg.cookie.mbx.chnl_opcode) { - case VIRTCHNL2_OP_VERSION: - case VIRTCHNL2_OP_GET_CAPS: - if (ctlq_msg.cookie.mbx.chnl_retval) { - dev_err(&adapter->pdev->dev, "Failure initializing, vc op: %u retval: %u\n", - ctlq_msg.cookie.mbx.chnl_opcode, - ctlq_msg.cookie.mbx.chnl_retval); - err = -EBADMSG; - } else if (msg) { - memcpy(msg, ctlq_msg.ctx.indirect.payload->va, - min_t(int, payload_size, msg_size)); - } - work_done = true; - break; - case VIRTCHNL2_OP_CREATE_VPORT: - idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg, - IDPF_VC_CREATE_VPORT, - IDPF_VC_CREATE_VPORT_ERR); - break; - case VIRTCHNL2_OP_ENABLE_VPORT: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_ENA_VPORT, - IDPF_VC_ENA_VPORT_ERR); - break; - case VIRTCHNL2_OP_DISABLE_VPORT: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_DIS_VPORT, - IDPF_VC_DIS_VPORT_ERR); - break; - case VIRTCHNL2_OP_DESTROY_VPORT: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_DESTROY_VPORT, - IDPF_VC_DESTROY_VPORT_ERR); - break; - case VIRTCHNL2_OP_CONFIG_TX_QUEUES: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_CONFIG_TXQ, - IDPF_VC_CONFIG_TXQ_ERR); - break; - case VIRTCHNL2_OP_CONFIG_RX_QUEUES: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_CONFIG_RXQ, - IDPF_VC_CONFIG_RXQ_ERR); - break; - case VIRTCHNL2_OP_ENABLE_QUEUES: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_ENA_QUEUES, - IDPF_VC_ENA_QUEUES_ERR); - break; - case VIRTCHNL2_OP_DISABLE_QUEUES: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_DIS_QUEUES, - IDPF_VC_DIS_QUEUES_ERR); - break; - case VIRTCHNL2_OP_ADD_QUEUES: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_ADD_QUEUES, - IDPF_VC_ADD_QUEUES_ERR); - break; - case VIRTCHNL2_OP_DEL_QUEUES: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_DEL_QUEUES, - IDPF_VC_DEL_QUEUES_ERR); - break; - case VIRTCHNL2_OP_MAP_QUEUE_VECTOR: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_MAP_IRQ, - IDPF_VC_MAP_IRQ_ERR); - break; - case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_UNMAP_IRQ, - IDPF_VC_UNMAP_IRQ_ERR); - break; - case VIRTCHNL2_OP_GET_STATS: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_GET_STATS, - IDPF_VC_GET_STATS_ERR); - break; - case VIRTCHNL2_OP_GET_RSS_LUT: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_GET_RSS_LUT, - IDPF_VC_GET_RSS_LUT_ERR); - break; - case VIRTCHNL2_OP_SET_RSS_LUT: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_SET_RSS_LUT, - IDPF_VC_SET_RSS_LUT_ERR); - break; - case VIRTCHNL2_OP_GET_RSS_KEY: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_GET_RSS_KEY, - IDPF_VC_GET_RSS_KEY_ERR); - break; - case VIRTCHNL2_OP_SET_RSS_KEY: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_SET_RSS_KEY, - IDPF_VC_SET_RSS_KEY_ERR); - break; - case VIRTCHNL2_OP_SET_SRIOV_VFS: - idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg, - IDPF_VC_SET_SRIOV_VFS, - IDPF_VC_SET_SRIOV_VFS_ERR); - break; - case VIRTCHNL2_OP_ALLOC_VECTORS: - idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg, - IDPF_VC_ALLOC_VECTORS, - IDPF_VC_ALLOC_VECTORS_ERR); - break; - case VIRTCHNL2_OP_DEALLOC_VECTORS: - idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg, - IDPF_VC_DEALLOC_VECTORS, - IDPF_VC_DEALLOC_VECTORS_ERR); - break; - case VIRTCHNL2_OP_GET_PTYPE_INFO: - idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg, - IDPF_VC_GET_PTYPE_INFO, - IDPF_VC_GET_PTYPE_INFO_ERR); - break; - case VIRTCHNL2_OP_LOOPBACK: - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_LOOPBACK_STATE, - IDPF_VC_LOOPBACK_STATE_ERR); - break; - case VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE: - /* This message can only be sent asynchronously. As - * such we'll have lost the context in which it was - * called and thus can only really report if it looks - * like an error occurred. Don't bother setting ERR bit - * or waking chnl_wq since no work queue will be waiting - * to read the message. - */ - if (ctlq_msg.cookie.mbx.chnl_retval) { - dev_err(&adapter->pdev->dev, "Failed to set promiscuous mode: %d\n", - ctlq_msg.cookie.mbx.chnl_retval); - } - break; - case VIRTCHNL2_OP_ADD_MAC_ADDR: - vport_config = adapter->vport_config[vport->idx]; - if (test_and_clear_bit(IDPF_VPORT_ADD_MAC_REQ, - vport_config->flags)) { - /* Message was sent asynchronously. We don't - * normally print errors here, instead - * prefer to handle errors in the function - * calling wait_for_event. However, if - * asynchronous, the context in which the - * message was sent is lost. We can't really do - * anything about at it this point, but we - * should at a minimum indicate that it looks - * like something went wrong. Also don't bother - * setting ERR bit or waking vchnl_wq since no - * one will be waiting to read the async - * message. - */ - if (ctlq_msg.cookie.mbx.chnl_retval) - dev_err(&adapter->pdev->dev, "Failed to add MAC address: %d\n", - ctlq_msg.cookie.mbx.chnl_retval); - break; - } - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_ADD_MAC_ADDR, - IDPF_VC_ADD_MAC_ADDR_ERR); - break; - case VIRTCHNL2_OP_DEL_MAC_ADDR: - vport_config = adapter->vport_config[vport->idx]; - if (test_and_clear_bit(IDPF_VPORT_DEL_MAC_REQ, - vport_config->flags)) { - /* Message was sent asynchronously like the - * VIRTCHNL2_OP_ADD_MAC_ADDR - */ - if (ctlq_msg.cookie.mbx.chnl_retval) - dev_err(&adapter->pdev->dev, "Failed to delete MAC address: %d\n", - ctlq_msg.cookie.mbx.chnl_retval); - break; - } - idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, - IDPF_VC_DEL_MAC_ADDR, - IDPF_VC_DEL_MAC_ADDR_ERR); - break; - case VIRTCHNL2_OP_EVENT: - idpf_recv_event_msg(vport, &ctlq_msg); - break; - default: - dev_warn(&adapter->pdev->dev, - "Unhandled virtchnl response %d\n", - ctlq_msg.cookie.mbx.chnl_opcode); - break; - } + retval = idpf_send_mb_msg(adapter, params->vc_op, + send_buf->iov_len, send_buf->iov_base, + cookie); + if (retval) { + idpf_vc_xn_lock(xn); + goto release_and_unlock; + } -post_buffs: - if (ctlq_msg.data_len) - dma_mem = ctlq_msg.ctx.indirect.payload; - else - num_q_msg = 0; + if (params->async) + return 0; - err = idpf_ctlq_post_rx_buffs(&adapter->hw, adapter->hw.arq, - &num_q_msg, &dma_mem); - /* If post failed clear the only buffer we supplied */ - if (err && dma_mem) - dma_free_coherent(&adapter->pdev->dev, dma_mem->size, - dma_mem->va, dma_mem->pa); + wait_for_completion_timeout(&xn->completed, + msecs_to_jiffies(params->timeout_ms)); - /* Applies only if we are looking for a specific opcode */ - if (work_done) - break; + /* No need to check the return value; we check the final state of the + * transaction below. It's possible the transaction actually gets more + * timeout than specified if we get preempted here but after + * wait_for_completion_timeout returns. This should be non-issue + * however. + */ + idpf_vc_xn_lock(xn); + switch (xn->state) { + case IDPF_VC_XN_SHUTDOWN: + retval = -ENXIO; + goto only_unlock; + case IDPF_VC_XN_WAITING: + dev_notice_ratelimited(&adapter->pdev->dev, "Transaction timed-out (op %d, %dms)\n", + params->vc_op, params->timeout_ms); + retval = -ETIME; + break; + case IDPF_VC_XN_COMPLETED_SUCCESS: + retval = xn->reply_sz; + break; + case IDPF_VC_XN_COMPLETED_FAILED: + dev_notice_ratelimited(&adapter->pdev->dev, "Transaction failed (op %d)\n", + params->vc_op); + retval = -EIO; + break; + default: + /* Invalid state. */ + WARN_ON_ONCE(1); + retval = -EIO; + break; } - return err; +release_and_unlock: + idpf_vc_xn_push_free(adapter->vcxn_mngr, xn); + /* If we receive a VC reply after here, it will be dropped. */ +only_unlock: + idpf_vc_xn_unlock(xn); + + return retval; } /** - * __idpf_wait_for_event - wrapper function for wait on virtchannel response - * @adapter: Driver private data structure - * @vport: virtual port structure - * @state: check on state upon timeout - * @err_check: check if this specific error bit is set - * @timeout: Max time to wait + * idpf_vc_xn_forward_async - Handle async reply receives + * @adapter: private data struct + * @xn: transaction to handle + * @ctlq_msg: corresponding ctlq_msg * - * Checks if state is set upon expiry of timeout. Returns 0 on success, - * negative on failure. + * For async sends we're going to lose the caller's context so, if an + * async_handler was provided, it can deal with the reply, otherwise we'll just + * check and report if there is an error. */ -static int __idpf_wait_for_event(struct idpf_adapter *adapter, - struct idpf_vport *vport, - enum idpf_vport_vc_state state, - enum idpf_vport_vc_state err_check, - int timeout) +static int +idpf_vc_xn_forward_async(struct idpf_adapter *adapter, struct idpf_vc_xn *xn, + const struct idpf_ctlq_msg *ctlq_msg) { - int time_to_wait, num_waits; - wait_queue_head_t *vchnl_wq; - unsigned long *vc_state; + int err = 0; - time_to_wait = ((timeout <= IDPF_MAX_WAIT) ? timeout : IDPF_MAX_WAIT); - num_waits = ((timeout <= IDPF_MAX_WAIT) ? 1 : timeout / IDPF_MAX_WAIT); + if (ctlq_msg->cookie.mbx.chnl_opcode != xn->vc_op) { + dev_err_ratelimited(&adapter->pdev->dev, "Async message opcode does not match transaction opcode (msg: %d) (xn: %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode, xn->vc_op); + xn->reply_sz = 0; + err = -EINVAL; + goto release_bufs; + } - if (vport) { - vchnl_wq = &vport->vchnl_wq; - vc_state = vport->vc_state; - } else { - vchnl_wq = &adapter->vchnl_wq; - vc_state = adapter->vc_state; + if (xn->async_handler) { + err = xn->async_handler(adapter, xn, ctlq_msg); + goto release_bufs; + } + + if (ctlq_msg->cookie.mbx.chnl_retval) { + xn->reply_sz = 0; + dev_err_ratelimited(&adapter->pdev->dev, "Async message failure (op %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode); + err = -EINVAL; } - while (num_waits) { - int event; +release_bufs: + idpf_vc_xn_push_free(adapter->vcxn_mngr, xn); + + return err; +} + +/** + * idpf_vc_xn_forward_reply - copy a reply back to receiving thread + * @adapter: driver specific private structure with vcxn_mngr + * @ctlq_msg: controlq message to send back to receiving thread + */ +static int +idpf_vc_xn_forward_reply(struct idpf_adapter *adapter, + const struct idpf_ctlq_msg *ctlq_msg) +{ + const void *payload = NULL; + size_t payload_size = 0; + struct idpf_vc_xn *xn; + u16 msg_info; + int err = 0; + u16 xn_idx; + u16 salt; + + msg_info = ctlq_msg->ctx.sw_cookie.data; + xn_idx = FIELD_GET(IDPF_VC_XN_IDX_M, msg_info); + if (xn_idx >= ARRAY_SIZE(adapter->vcxn_mngr->ring)) { + dev_err_ratelimited(&adapter->pdev->dev, "Out of bounds cookie received: %02x\n", + xn_idx); + return -EINVAL; + } + xn = &adapter->vcxn_mngr->ring[xn_idx]; + salt = FIELD_GET(IDPF_VC_XN_SALT_M, msg_info); + if (xn->salt != salt) { + dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (%02x != %02x)\n", + xn->salt, salt); + return -EINVAL; + } - /* If we are here and a reset is detected do not wait but - * return. Reset timing is out of drivers control. So - * while we are cleaning resources as part of reset if the - * underlying HW mailbox is gone, wait on mailbox messages - * is not meaningful + idpf_vc_xn_lock(xn); + switch (xn->state) { + case IDPF_VC_XN_WAITING: + /* success */ + break; + case IDPF_VC_XN_IDLE: + dev_err_ratelimited(&adapter->pdev->dev, "Unexpected or belated VC reply (op %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode); + err = -EINVAL; + goto out_unlock; + case IDPF_VC_XN_SHUTDOWN: + /* ENXIO is a bit special here as the recv msg loop uses that + * know if it should stop trying to clean the ring if we lost + * the virtchnl. We need to stop playing with registers and + * yield. */ - if (idpf_is_reset_detected(adapter)) - return 0; + err = -ENXIO; + goto out_unlock; + case IDPF_VC_XN_ASYNC: + err = idpf_vc_xn_forward_async(adapter, xn, ctlq_msg); + idpf_vc_xn_unlock(xn); + return err; + default: + dev_err_ratelimited(&adapter->pdev->dev, "Overwriting VC reply (op %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode); + err = -EBUSY; + goto out_unlock; + } - event = wait_event_timeout(*vchnl_wq, - test_and_clear_bit(state, vc_state), - msecs_to_jiffies(time_to_wait)); - if (event) { - if (test_and_clear_bit(err_check, vc_state)) { - dev_err(&adapter->pdev->dev, "VC response error %s\n", - idpf_vport_vc_state_str[err_check]); + if (ctlq_msg->cookie.mbx.chnl_opcode != xn->vc_op) { + dev_err_ratelimited(&adapter->pdev->dev, "Message opcode does not match transaction opcode (msg: %d) (xn: %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode, xn->vc_op); + xn->reply_sz = 0; + xn->state = IDPF_VC_XN_COMPLETED_FAILED; + err = -EINVAL; + goto out_unlock; + } - return -EINVAL; - } + if (ctlq_msg->cookie.mbx.chnl_retval) { + xn->reply_sz = 0; + xn->state = IDPF_VC_XN_COMPLETED_FAILED; + err = -EINVAL; + goto out_unlock; + } - return 0; - } - num_waits--; + if (ctlq_msg->data_len) { + payload = ctlq_msg->ctx.indirect.payload->va; + payload_size = ctlq_msg->ctx.indirect.payload->size; } - /* Timeout occurred */ - dev_err(&adapter->pdev->dev, "VC timeout, state = %s\n", - idpf_vport_vc_state_str[state]); + xn->reply_sz = payload_size; + xn->state = IDPF_VC_XN_COMPLETED_SUCCESS; - return -ETIMEDOUT; + if (xn->reply.iov_base && xn->reply.iov_len && payload_size) + memcpy(xn->reply.iov_base, payload, + min_t(size_t, xn->reply.iov_len, payload_size)); + +out_unlock: + idpf_vc_xn_unlock(xn); + /* we _cannot_ hold lock while calling complete */ + complete(&xn->completed); + + return err; } /** - * idpf_min_wait_for_event - wait for virtchannel response - * @adapter: Driver private data structure - * @vport: virtual port structure - * @state: check on state upon timeout - * @err_check: check if this specific error bit is set + * idpf_recv_mb_msg - Receive message over mailbox + * @adapter: Driver specific private structure * - * Returns 0 on success, negative on failure. + * Will receive control queue message and posts the receive buffer. Returns 0 + * on success and negative on failure. */ -static int idpf_min_wait_for_event(struct idpf_adapter *adapter, - struct idpf_vport *vport, - enum idpf_vport_vc_state state, - enum idpf_vport_vc_state err_check) +int idpf_recv_mb_msg(struct idpf_adapter *adapter) { - return __idpf_wait_for_event(adapter, vport, state, err_check, - IDPF_WAIT_FOR_EVENT_TIMEO_MIN); -} + struct idpf_ctlq_msg ctlq_msg; + struct idpf_dma_mem *dma_mem; + int post_err, err; + u16 num_recv; -/** - * idpf_wait_for_event - wait for virtchannel response - * @adapter: Driver private data structure - * @vport: virtual port structure - * @state: check on state upon timeout after 500ms - * @err_check: check if this specific error bit is set - * - * Returns 0 on success, negative on failure. - */ -static int idpf_wait_for_event(struct idpf_adapter *adapter, - struct idpf_vport *vport, - enum idpf_vport_vc_state state, - enum idpf_vport_vc_state err_check) -{ - /* Increasing the timeout in __IDPF_INIT_SW flow to consider large - * number of VF's mailbox message responses. When a message is received - * on mailbox, this thread is woken up by the idpf_recv_mb_msg before - * the timeout expires. Only in the error case i.e. if no message is - * received on mailbox, we wait for the complete timeout which is - * less likely to happen. - */ - return __idpf_wait_for_event(adapter, vport, state, err_check, - IDPF_WAIT_FOR_EVENT_TIMEO); + while (1) { + /* This will get <= num_recv messages and output how many + * actually received on num_recv. + */ + num_recv = 1; + err = idpf_ctlq_recv(adapter->hw.arq, &num_recv, &ctlq_msg); + if (err || !num_recv) + break; + + if (ctlq_msg.data_len) { + dma_mem = ctlq_msg.ctx.indirect.payload; + } else { + dma_mem = NULL; + num_recv = 0; + } + + if (ctlq_msg.cookie.mbx.chnl_opcode == VIRTCHNL2_OP_EVENT) + idpf_recv_event_msg(adapter, &ctlq_msg); + else + err = idpf_vc_xn_forward_reply(adapter, &ctlq_msg); + + post_err = idpf_ctlq_post_rx_buffs(&adapter->hw, + adapter->hw.arq, + &num_recv, &dma_mem); + + /* If post failed clear the only buffer we supplied */ + if (post_err) { + if (dma_mem) + dmam_free_coherent(&adapter->pdev->dev, + dma_mem->size, dma_mem->va, + dma_mem->pa); + break; + } + + /* virtchnl trying to shutdown, stop cleaning */ + if (err == -ENXIO) + break; + } + + return err; } /** @@ -785,7 +776,11 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport) */ static int idpf_send_ver_msg(struct idpf_adapter *adapter) { + struct idpf_vc_xn_params xn_params = {}; struct virtchnl2_version_info vvi; + ssize_t reply_sz; + u32 major, minor; + int err = 0; if (adapter->virt_ver_maj) { vvi.major = cpu_to_le32(adapter->virt_ver_maj); @@ -795,43 +790,29 @@ static int idpf_send_ver_msg(struct idpf_adapter *adapter) vvi.minor = cpu_to_le32(IDPF_VIRTCHNL_VERSION_MINOR); } - return idpf_send_mb_msg(adapter, VIRTCHNL2_OP_VERSION, sizeof(vvi), - (u8 *)&vvi); -} - -/** - * idpf_recv_ver_msg - Receive virtchnl version message - * @adapter: Driver specific private structure - * - * Receive virtchnl version message. Returns 0 on success, -EAGAIN if we need - * to send version message again, otherwise negative on failure. - */ -static int idpf_recv_ver_msg(struct idpf_adapter *adapter) -{ - struct virtchnl2_version_info vvi; - u32 major, minor; - int err; + xn_params.vc_op = VIRTCHNL2_OP_VERSION; + xn_params.send_buf.iov_base = &vvi; + xn_params.send_buf.iov_len = sizeof(vvi); + xn_params.recv_buf = xn_params.send_buf; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; - err = idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_VERSION, &vvi, - sizeof(vvi)); - if (err) - return err; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + if (reply_sz < sizeof(vvi)) + return -EIO; major = le32_to_cpu(vvi.major); minor = le32_to_cpu(vvi.minor); if (major > IDPF_VIRTCHNL_VERSION_MAJOR) { - dev_warn(&adapter->pdev->dev, - "Virtchnl major version (%d) greater than supported\n", - major); - + dev_warn(&adapter->pdev->dev, "Virtchnl major version greater than supported\n"); return -EINVAL; } if (major == IDPF_VIRTCHNL_VERSION_MAJOR && minor > IDPF_VIRTCHNL_VERSION_MINOR) - dev_warn(&adapter->pdev->dev, - "Virtchnl minor version (%d) didn't match\n", minor); + dev_warn(&adapter->pdev->dev, "Virtchnl minor version didn't match\n"); /* If we have a mismatch, resend version to update receiver on what * version we will use. @@ -856,7 +837,9 @@ static int idpf_recv_ver_msg(struct idpf_adapter *adapter) */ static int idpf_send_get_caps_msg(struct idpf_adapter *adapter) { - struct virtchnl2_get_capabilities caps = { }; + struct virtchnl2_get_capabilities caps = {}; + struct idpf_vc_xn_params xn_params = {}; + ssize_t reply_sz; caps.csum_caps = cpu_to_le32(VIRTCHNL2_CAP_TX_CSUM_L3_IPV4 | @@ -913,21 +896,20 @@ static int idpf_send_get_caps_msg(struct idpf_adapter *adapter) VIRTCHNL2_CAP_PROMISC | VIRTCHNL2_CAP_LOOPBACK); - return idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_CAPS, sizeof(caps), - (u8 *)&caps); -} + xn_params.vc_op = VIRTCHNL2_OP_GET_CAPS; + xn_params.send_buf.iov_base = ∩︀ + xn_params.send_buf.iov_len = sizeof(caps); + xn_params.recv_buf.iov_base = &adapter->caps; + xn_params.recv_buf.iov_len = sizeof(adapter->caps); + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; -/** - * idpf_recv_get_caps_msg - Receive virtchnl get capabilities message - * @adapter: Driver specific private structure - * - * Receive virtchnl get capabilities message. Returns 0 on success, negative on - * failure. - */ -static int idpf_recv_get_caps_msg(struct idpf_adapter *adapter) -{ - return idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_GET_CAPS, &adapter->caps, - sizeof(struct virtchnl2_get_capabilities)); + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + if (reply_sz < sizeof(adapter->caps)) + return -EIO; + + return 0; } /** @@ -1254,8 +1236,10 @@ int idpf_send_create_vport_msg(struct idpf_adapter *adapter, struct idpf_vport_max_q *max_q) { struct virtchnl2_create_vport *vport_msg; + struct idpf_vc_xn_params xn_params = {}; u16 idx = adapter->next_vport; int err, buf_size; + ssize_t reply_sz; buf_size = sizeof(struct virtchnl2_create_vport); if (!adapter->vport_params_reqd[idx]) { @@ -1286,35 +1270,38 @@ int idpf_send_create_vport_msg(struct idpf_adapter *adapter, return err; } - mutex_lock(&adapter->vc_buf_lock); - - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_CREATE_VPORT, buf_size, - (u8 *)vport_msg); - if (err) - goto rel_lock; - - err = idpf_wait_for_event(adapter, NULL, IDPF_VC_CREATE_VPORT, - IDPF_VC_CREATE_VPORT_ERR); - if (err) { - dev_err(&adapter->pdev->dev, "Failed to receive create vport message"); - - goto rel_lock; - } - if (!adapter->vport_params_recvd[idx]) { adapter->vport_params_recvd[idx] = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); if (!adapter->vport_params_recvd[idx]) { err = -ENOMEM; - goto rel_lock; + goto free_vport_params; } } - vport_msg = adapter->vport_params_recvd[idx]; - memcpy(vport_msg, adapter->vc_msg, IDPF_CTLQ_MAX_BUF_LEN); + xn_params.vc_op = VIRTCHNL2_OP_CREATE_VPORT; + xn_params.send_buf.iov_base = vport_msg; + xn_params.send_buf.iov_len = buf_size; + xn_params.recv_buf.iov_base = adapter->vport_params_recvd[idx]; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) { + err = reply_sz; + goto free_vport_params; + } + if (reply_sz < IDPF_CTLQ_MAX_BUF_LEN) { + err = -EIO; + goto free_vport_params; + } -rel_lock: - mutex_unlock(&adapter->vc_buf_lock); + return 0; + +free_vport_params: + kfree(adapter->vport_params_recvd[idx]); + adapter->vport_params_recvd[idx] = NULL; + kfree(adapter->vport_params_reqd[idx]); + adapter->vport_params_reqd[idx] = NULL; return err; } @@ -1366,26 +1353,19 @@ int idpf_check_supported_desc_ids(struct idpf_vport *vport) */ int idpf_send_destroy_vport_msg(struct idpf_vport *vport) { - struct idpf_adapter *adapter = vport->adapter; + struct idpf_vc_xn_params xn_params = {}; struct virtchnl2_vport v_id; - int err; + ssize_t reply_sz; v_id.vport_id = cpu_to_le32(vport->vport_id); - mutex_lock(&vport->vc_buf_lock); + xn_params.vc_op = VIRTCHNL2_OP_DESTROY_VPORT; + xn_params.send_buf.iov_base = &v_id; + xn_params.send_buf.iov_len = sizeof(v_id); + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DESTROY_VPORT, - sizeof(v_id), (u8 *)&v_id); - if (err) - goto rel_lock; - - err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DESTROY_VPORT, - IDPF_VC_DESTROY_VPORT_ERR); - -rel_lock: - mutex_unlock(&vport->vc_buf_lock); - - return err; + return reply_sz < 0 ? reply_sz : 0; } /** @@ -1397,26 +1377,19 @@ rel_lock: */ int idpf_send_enable_vport_msg(struct idpf_vport *vport) { - struct idpf_adapter *adapter = vport->adapter; + struct idpf_vc_xn_params xn_params = {}; struct virtchnl2_vport v_id; - int err; + ssize_t reply_sz; v_id.vport_id = cpu_to_le32(vport->vport_id); - mutex_lock(&vport->vc_buf_lock); - - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ENABLE_VPORT, - sizeof(v_id), (u8 *)&v_id); - if (err) - goto rel_lock; - - err = idpf_wait_for_event(adapter, vport, IDPF_VC_ENA_VPORT, - IDPF_VC_ENA_VPORT_ERR); + xn_params.vc_op = VIRTCHNL2_OP_ENABLE_VPORT; + xn_params.send_buf.iov_base = &v_id; + xn_params.send_buf.iov_len = sizeof(v_id); + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); -rel_lock: - mutex_unlock(&vport->vc_buf_lock); - - return err; + return reply_sz < 0 ? reply_sz : 0; } /** @@ -1428,26 +1401,19 @@ rel_lock: */ int idpf_send_disable_vport_msg(struct idpf_vport *vport) { - struct idpf_adapter *adapter = vport->adapter; + struct idpf_vc_xn_params xn_params = {}; struct virtchnl2_vport v_id; - int err; + ssize_t reply_sz; v_id.vport_id = cpu_to_le32(vport->vport_id); - mutex_lock(&vport->vc_buf_lock); + xn_params.vc_op = VIRTCHNL2_OP_DISABLE_VPORT; + xn_params.send_buf.iov_base = &v_id; + xn_params.send_buf.iov_len = sizeof(v_id); + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DISABLE_VPORT, - sizeof(v_id), (u8 *)&v_id); - if (err) - goto rel_lock; - - err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DIS_VPORT, - IDPF_VC_DIS_VPORT_ERR); - -rel_lock: - mutex_unlock(&vport->vc_buf_lock); - - return err; + return reply_sz < 0 ? reply_sz : 0; } /** @@ -1459,11 +1425,13 @@ rel_lock: */ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) { - struct virtchnl2_config_tx_queues *ctq; + struct virtchnl2_config_tx_queues *ctq __free(kfree) = NULL; + struct virtchnl2_txq_info *qi __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; u32 config_sz, chunk_sz, buf_sz; int totqs, num_msgs, num_chunks; - struct virtchnl2_txq_info *qi; - int err = 0, i, k = 0; + ssize_t reply_sz; + int i, k = 0; totqs = vport->num_txq + vport->num_complq; qi = kcalloc(totqs, sizeof(struct virtchnl2_txq_info), GFP_KERNEL); @@ -1524,10 +1492,8 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) } /* Make sure accounting agrees */ - if (k != totqs) { - err = -EINVAL; - goto error; - } + if (k != totqs) + return -EINVAL; /* Chunk up the queue contexts into multiple messages to avoid * sending a control queue message buffer that is too large @@ -1541,12 +1507,11 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) buf_sz = struct_size(ctq, qinfo, num_chunks); ctq = kzalloc(buf_sz, GFP_KERNEL); - if (!ctq) { - err = -ENOMEM; - goto error; - } + if (!ctq) + return -ENOMEM; - mutex_lock(&vport->vc_buf_lock); + xn_params.vc_op = VIRTCHNL2_OP_CONFIG_TX_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; for (i = 0, k = 0; i < num_msgs; i++) { memset(ctq, 0, buf_sz); @@ -1554,17 +1519,11 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) ctq->num_qinfo = cpu_to_le16(num_chunks); memcpy(ctq->qinfo, &qi[k], chunk_sz * num_chunks); - err = idpf_send_mb_msg(vport->adapter, - VIRTCHNL2_OP_CONFIG_TX_QUEUES, - buf_sz, (u8 *)ctq); - if (err) - goto mbx_error; - - err = idpf_wait_for_event(vport->adapter, vport, - IDPF_VC_CONFIG_TXQ, - IDPF_VC_CONFIG_TXQ_ERR); - if (err) - goto mbx_error; + xn_params.send_buf.iov_base = ctq; + xn_params.send_buf.iov_len = buf_sz; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; k += num_chunks; totqs -= num_chunks; @@ -1573,13 +1532,7 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) buf_sz = struct_size(ctq, qinfo, num_chunks); } -mbx_error: - mutex_unlock(&vport->vc_buf_lock); - kfree(ctq); -error: - kfree(qi); - - return err; + return 0; } /** @@ -1591,11 +1544,13 @@ error: */ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) { - struct virtchnl2_config_rx_queues *crq; + struct virtchnl2_config_rx_queues *crq __free(kfree) = NULL; + struct virtchnl2_rxq_info *qi __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; u32 config_sz, chunk_sz, buf_sz; int totqs, num_msgs, num_chunks; - struct virtchnl2_rxq_info *qi; - int err = 0, i, k = 0; + ssize_t reply_sz; + int i, k = 0; totqs = vport->num_rxq + vport->num_bufq; qi = kcalloc(totqs, sizeof(struct virtchnl2_rxq_info), GFP_KERNEL); @@ -1676,10 +1631,8 @@ common_qi_fields: } /* Make sure accounting agrees */ - if (k != totqs) { - err = -EINVAL; - goto error; - } + if (k != totqs) + return -EINVAL; /* Chunk up the queue contexts into multiple messages to avoid * sending a control queue message buffer that is too large @@ -1693,12 +1646,11 @@ common_qi_fields: buf_sz = struct_size(crq, qinfo, num_chunks); crq = kzalloc(buf_sz, GFP_KERNEL); - if (!crq) { - err = -ENOMEM; - goto error; - } + if (!crq) + return -ENOMEM; - mutex_lock(&vport->vc_buf_lock); + xn_params.vc_op = VIRTCHNL2_OP_CONFIG_RX_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; for (i = 0, k = 0; i < num_msgs; i++) { memset(crq, 0, buf_sz); @@ -1706,17 +1658,11 @@ common_qi_fields: crq->num_qinfo = cpu_to_le16(num_chunks); memcpy(crq->qinfo, &qi[k], chunk_sz * num_chunks); - err = idpf_send_mb_msg(vport->adapter, - VIRTCHNL2_OP_CONFIG_RX_QUEUES, - buf_sz, (u8 *)crq); - if (err) - goto mbx_error; - - err = idpf_wait_for_event(vport->adapter, vport, - IDPF_VC_CONFIG_RXQ, - IDPF_VC_CONFIG_RXQ_ERR); - if (err) - goto mbx_error; + xn_params.send_buf.iov_base = crq; + xn_params.send_buf.iov_len = buf_sz; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; k += num_chunks; totqs -= num_chunks; @@ -1725,42 +1671,28 @@ common_qi_fields: buf_sz = struct_size(crq, qinfo, num_chunks); } -mbx_error: - mutex_unlock(&vport->vc_buf_lock); - kfree(crq); -error: - kfree(qi); - - return err; + return 0; } /** * idpf_send_ena_dis_queues_msg - Send virtchnl enable or disable * queues message * @vport: virtual port data structure - * @vc_op: virtchnl op code to send + * @ena: if true enable, false disable * * Send enable or disable queues virtchnl message. Returns 0 on success, * negative on failure. */ -static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, u32 vc_op) +static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) { + struct virtchnl2_del_ena_dis_queues *eq __free(kfree) = NULL; + struct virtchnl2_queue_chunk *qc __free(kfree) = NULL; u32 num_msgs, num_chunks, num_txq, num_rxq, num_q; - struct idpf_adapter *adapter = vport->adapter; - struct virtchnl2_del_ena_dis_queues *eq; + struct idpf_vc_xn_params xn_params = {}; struct virtchnl2_queue_chunks *qcs; - struct virtchnl2_queue_chunk *qc; u32 config_sz, chunk_sz, buf_sz; - int i, j, k = 0, err = 0; - - /* validate virtchnl op */ - switch (vc_op) { - case VIRTCHNL2_OP_ENABLE_QUEUES: - case VIRTCHNL2_OP_DISABLE_QUEUES: - break; - default: - return -EINVAL; - } + ssize_t reply_sz; + int i, j, k = 0; num_txq = vport->num_txq + vport->num_complq; num_rxq = vport->num_rxq + vport->num_bufq; @@ -1779,10 +1711,8 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, u32 vc_op) qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); } } - if (vport->num_txq != k) { - err = -EINVAL; - goto error; - } + if (vport->num_txq != k) + return -EINVAL; if (!idpf_is_queue_model_split(vport->txq_model)) goto setup_rx; @@ -1794,10 +1724,8 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, u32 vc_op) qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id); qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); } - if (vport->num_complq != (k - vport->num_txq)) { - err = -EINVAL; - goto error; - } + if (vport->num_complq != (k - vport->num_txq)) + return -EINVAL; setup_rx: for (i = 0; i < vport->num_rxq_grp; i++) { @@ -1823,10 +1751,8 @@ setup_rx: qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); } } - if (vport->num_rxq != k - (vport->num_txq + vport->num_complq)) { - err = -EINVAL; - goto error; - } + if (vport->num_rxq != k - (vport->num_txq + vport->num_complq)) + return -EINVAL; if (!idpf_is_queue_model_split(vport->rxq_model)) goto send_msg; @@ -1845,10 +1771,8 @@ setup_rx: } if (vport->num_bufq != k - (vport->num_txq + vport->num_complq + - vport->num_rxq)) { - err = -EINVAL; - goto error; - } + vport->num_rxq)) + return -EINVAL; send_msg: /* Chunk up the queue info into multiple messages */ @@ -1861,12 +1785,16 @@ send_msg: buf_sz = struct_size(eq, chunks.chunks, num_chunks); eq = kzalloc(buf_sz, GFP_KERNEL); - if (!eq) { - err = -ENOMEM; - goto error; - } + if (!eq) + return -ENOMEM; - mutex_lock(&vport->vc_buf_lock); + if (ena) { + xn_params.vc_op = VIRTCHNL2_OP_ENABLE_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + } else { + xn_params.vc_op = VIRTCHNL2_OP_DISABLE_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + } for (i = 0, k = 0; i < num_msgs; i++) { memset(eq, 0, buf_sz); @@ -1875,20 +1803,11 @@ send_msg: qcs = &eq->chunks; memcpy(qcs->chunks, &qc[k], chunk_sz * num_chunks); - err = idpf_send_mb_msg(adapter, vc_op, buf_sz, (u8 *)eq); - if (err) - goto mbx_error; - - if (vc_op == VIRTCHNL2_OP_ENABLE_QUEUES) - err = idpf_wait_for_event(adapter, vport, - IDPF_VC_ENA_QUEUES, - IDPF_VC_ENA_QUEUES_ERR); - else - err = idpf_min_wait_for_event(adapter, vport, - IDPF_VC_DIS_QUEUES, - IDPF_VC_DIS_QUEUES_ERR); - if (err) - goto mbx_error; + xn_params.send_buf.iov_base = eq; + xn_params.send_buf.iov_len = buf_sz; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; k += num_chunks; num_q -= num_chunks; @@ -1897,13 +1816,7 @@ send_msg: buf_sz = struct_size(eq, chunks.chunks, num_chunks); } -mbx_error: - mutex_unlock(&vport->vc_buf_lock); - kfree(eq); -error: - kfree(qc); - - return err; + return 0; } /** @@ -1917,12 +1830,13 @@ error: */ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) { - struct idpf_adapter *adapter = vport->adapter; - struct virtchnl2_queue_vector_maps *vqvm; - struct virtchnl2_queue_vector *vqv; + struct virtchnl2_queue_vector_maps *vqvm __free(kfree) = NULL; + struct virtchnl2_queue_vector *vqv __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; u32 config_sz, chunk_sz, buf_sz; u32 num_msgs, num_chunks, num_q; - int i, j, k = 0, err = 0; + ssize_t reply_sz; + int i, j, k = 0; num_q = vport->num_txq + vport->num_rxq; @@ -1952,10 +1866,8 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) } } - if (vport->num_txq != k) { - err = -EINVAL; - goto error; - } + if (vport->num_txq != k) + return -EINVAL; for (i = 0; i < vport->num_rxq_grp; i++) { struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; @@ -1982,15 +1894,11 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) } if (idpf_is_queue_model_split(vport->txq_model)) { - if (vport->num_rxq != k - vport->num_complq) { - err = -EINVAL; - goto error; - } + if (vport->num_rxq != k - vport->num_complq) + return -EINVAL; } else { - if (vport->num_rxq != k - vport->num_txq) { - err = -EINVAL; - goto error; - } + if (vport->num_rxq != k - vport->num_txq) + return -EINVAL; } /* Chunk up the vector info into multiple messages */ @@ -2003,39 +1911,28 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) buf_sz = struct_size(vqvm, qv_maps, num_chunks); vqvm = kzalloc(buf_sz, GFP_KERNEL); - if (!vqvm) { - err = -ENOMEM; - goto error; - } + if (!vqvm) + return -ENOMEM; - mutex_lock(&vport->vc_buf_lock); + if (map) { + xn_params.vc_op = VIRTCHNL2_OP_MAP_QUEUE_VECTOR; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + } else { + xn_params.vc_op = VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR; + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + } for (i = 0, k = 0; i < num_msgs; i++) { memset(vqvm, 0, buf_sz); + xn_params.send_buf.iov_base = vqvm; + xn_params.send_buf.iov_len = buf_sz; vqvm->vport_id = cpu_to_le32(vport->vport_id); vqvm->num_qv_maps = cpu_to_le16(num_chunks); memcpy(vqvm->qv_maps, &vqv[k], chunk_sz * num_chunks); - if (map) { - err = idpf_send_mb_msg(adapter, - VIRTCHNL2_OP_MAP_QUEUE_VECTOR, - buf_sz, (u8 *)vqvm); - if (!err) - err = idpf_wait_for_event(adapter, vport, - IDPF_VC_MAP_IRQ, - IDPF_VC_MAP_IRQ_ERR); - } else { - err = idpf_send_mb_msg(adapter, - VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR, - buf_sz, (u8 *)vqvm); - if (!err) - err = - idpf_min_wait_for_event(adapter, vport, - IDPF_VC_UNMAP_IRQ, - IDPF_VC_UNMAP_IRQ_ERR); - } - if (err) - goto mbx_error; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; k += num_chunks; num_q -= num_chunks; @@ -2044,13 +1941,7 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) buf_sz = struct_size(vqvm, qv_maps, num_chunks); } -mbx_error: - mutex_unlock(&vport->vc_buf_lock); - kfree(vqvm); -error: - kfree(vqv); - - return err; + return 0; } /** @@ -2062,7 +1953,7 @@ error: */ int idpf_send_enable_queues_msg(struct idpf_vport *vport) { - return idpf_send_ena_dis_queues_msg(vport, VIRTCHNL2_OP_ENABLE_QUEUES); + return idpf_send_ena_dis_queues_msg(vport, true); } /** @@ -2076,7 +1967,7 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport) { int err, i; - err = idpf_send_ena_dis_queues_msg(vport, VIRTCHNL2_OP_DISABLE_QUEUES); + err = idpf_send_ena_dis_queues_msg(vport, false); if (err) return err; @@ -2087,8 +1978,10 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport) set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); /* schedule the napi to receive all the marker packets */ + local_bh_disable(); for (i = 0; i < vport->num_q_vectors; i++) napi_schedule(&vport->q_vectors[i].napi); + local_bh_enable(); return idpf_wait_for_marker_event(vport); } @@ -2122,22 +2015,21 @@ static void idpf_convert_reg_to_queue_chunks(struct virtchnl2_queue_chunk *dchun */ int idpf_send_delete_queues_msg(struct idpf_vport *vport) { - struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_del_ena_dis_queues *eq __free(kfree) = NULL; struct virtchnl2_create_vport *vport_params; struct virtchnl2_queue_reg_chunks *chunks; - struct virtchnl2_del_ena_dis_queues *eq; + struct idpf_vc_xn_params xn_params = {}; struct idpf_vport_config *vport_config; u16 vport_idx = vport->idx; - int buf_size, err; + ssize_t reply_sz; u16 num_chunks; + int buf_size; - vport_config = adapter->vport_config[vport_idx]; + vport_config = vport->adapter->vport_config[vport_idx]; if (vport_config->req_qs_chunks) { - struct virtchnl2_add_queues *vc_aq = - (struct virtchnl2_add_queues *)vport_config->req_qs_chunks; - chunks = &vc_aq->chunks; + chunks = &vport_config->req_qs_chunks->chunks; } else { - vport_params = adapter->vport_params_recvd[vport_idx]; + vport_params = vport->adapter->vport_params_recvd[vport_idx]; chunks = &vport_params->chunks; } @@ -2154,21 +2046,13 @@ int idpf_send_delete_queues_msg(struct idpf_vport *vport) idpf_convert_reg_to_queue_chunks(eq->chunks.chunks, chunks->chunks, num_chunks); - mutex_lock(&vport->vc_buf_lock); - - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DEL_QUEUES, - buf_size, (u8 *)eq); - if (err) - goto rel_lock; - - err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DEL_QUEUES, - IDPF_VC_DEL_QUEUES_ERR); - -rel_lock: - mutex_unlock(&vport->vc_buf_lock); - kfree(eq); + xn_params.vc_op = VIRTCHNL2_OP_DEL_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = eq; + xn_params.send_buf.iov_len = buf_size; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - return err; + return reply_sz < 0 ? reply_sz : 0; } /** @@ -2203,14 +2087,21 @@ int idpf_send_config_queues_msg(struct idpf_vport *vport) int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q, u16 num_complq, u16 num_rx_q, u16 num_rx_bufq) { - struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_add_queues *vc_msg __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; struct idpf_vport_config *vport_config; - struct virtchnl2_add_queues aq = { }; - struct virtchnl2_add_queues *vc_msg; + struct virtchnl2_add_queues aq = {}; u16 vport_idx = vport->idx; - int size, err; + ssize_t reply_sz; + int size; + + vc_msg = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); + if (!vc_msg) + return -ENOMEM; - vport_config = adapter->vport_config[vport_idx]; + vport_config = vport->adapter->vport_config[vport_idx]; + kfree(vport_config->req_qs_chunks); + vport_config->req_qs_chunks = NULL; aq.vport_id = cpu_to_le32(vport->vport_id); aq.num_tx_q = cpu_to_le16(num_tx_q); @@ -2218,47 +2109,33 @@ int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q, aq.num_rx_q = cpu_to_le16(num_rx_q); aq.num_rx_bufq = cpu_to_le16(num_rx_bufq); - mutex_lock(&((struct idpf_vport *)vport)->vc_buf_lock); - - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ADD_QUEUES, - sizeof(struct virtchnl2_add_queues), (u8 *)&aq); - if (err) - goto rel_lock; - - /* We want vport to be const to prevent incidental code changes making - * changes to the vport config. We're making a special exception here - * to discard const to use the virtchnl. - */ - err = idpf_wait_for_event(adapter, (struct idpf_vport *)vport, - IDPF_VC_ADD_QUEUES, IDPF_VC_ADD_QUEUES_ERR); - if (err) - goto rel_lock; - - kfree(vport_config->req_qs_chunks); - vport_config->req_qs_chunks = NULL; + xn_params.vc_op = VIRTCHNL2_OP_ADD_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = &aq; + xn_params.send_buf.iov_len = sizeof(aq); + xn_params.recv_buf.iov_base = vc_msg; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; - vc_msg = (struct virtchnl2_add_queues *)vport->vc_msg; /* compare vc_msg num queues with vport num queues */ if (le16_to_cpu(vc_msg->num_tx_q) != num_tx_q || le16_to_cpu(vc_msg->num_rx_q) != num_rx_q || le16_to_cpu(vc_msg->num_tx_complq) != num_complq || - le16_to_cpu(vc_msg->num_rx_bufq) != num_rx_bufq) { - err = -EINVAL; - goto rel_lock; - } + le16_to_cpu(vc_msg->num_rx_bufq) != num_rx_bufq) + return -EINVAL; size = struct_size(vc_msg, chunks.chunks, le16_to_cpu(vc_msg->chunks.num_chunks)); - vport_config->req_qs_chunks = kmemdup(vc_msg, size, GFP_KERNEL); - if (!vport_config->req_qs_chunks) { - err = -ENOMEM; - goto rel_lock; - } + if (reply_sz < size) + return -EIO; -rel_lock: - mutex_unlock(&((struct idpf_vport *)vport)->vc_buf_lock); + vport_config->req_qs_chunks = kmemdup(vc_msg, size, GFP_KERNEL); + if (!vport_config->req_qs_chunks) + return -ENOMEM; - return err; + return 0; } /** @@ -2270,53 +2147,49 @@ rel_lock: */ int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors) { - struct virtchnl2_alloc_vectors *alloc_vec, *rcvd_vec; - struct virtchnl2_alloc_vectors ac = { }; + struct virtchnl2_alloc_vectors *rcvd_vec __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; + struct virtchnl2_alloc_vectors ac = {}; + ssize_t reply_sz; u16 num_vchunks; - int size, err; + int size; ac.num_vectors = cpu_to_le16(num_vectors); - mutex_lock(&adapter->vc_buf_lock); - - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ALLOC_VECTORS, - sizeof(ac), (u8 *)&ac); - if (err) - goto rel_lock; + rcvd_vec = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); + if (!rcvd_vec) + return -ENOMEM; - err = idpf_wait_for_event(adapter, NULL, IDPF_VC_ALLOC_VECTORS, - IDPF_VC_ALLOC_VECTORS_ERR); - if (err) - goto rel_lock; + xn_params.vc_op = VIRTCHNL2_OP_ALLOC_VECTORS; + xn_params.send_buf.iov_base = ∾ + xn_params.send_buf.iov_len = sizeof(ac); + xn_params.recv_buf.iov_base = rcvd_vec; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; - rcvd_vec = (struct virtchnl2_alloc_vectors *)adapter->vc_msg; num_vchunks = le16_to_cpu(rcvd_vec->vchunks.num_vchunks); - size = struct_size(rcvd_vec, vchunks.vchunks, num_vchunks); - if (size > sizeof(adapter->vc_msg)) { - err = -EINVAL; - goto rel_lock; - } + if (reply_sz < size) + return -EIO; + + if (size > IDPF_CTLQ_MAX_BUF_LEN) + return -EINVAL; kfree(adapter->req_vec_chunks); - adapter->req_vec_chunks = NULL; - adapter->req_vec_chunks = kmemdup(adapter->vc_msg, size, GFP_KERNEL); - if (!adapter->req_vec_chunks) { - err = -ENOMEM; - goto rel_lock; - } + adapter->req_vec_chunks = kmemdup(rcvd_vec, size, GFP_KERNEL); + if (!adapter->req_vec_chunks) + return -ENOMEM; - alloc_vec = adapter->req_vec_chunks; - if (le16_to_cpu(alloc_vec->num_vectors) < num_vectors) { + if (le16_to_cpu(adapter->req_vec_chunks->num_vectors) < num_vectors) { kfree(adapter->req_vec_chunks); adapter->req_vec_chunks = NULL; - err = -EINVAL; + return -EINVAL; } -rel_lock: - mutex_unlock(&adapter->vc_buf_lock); - - return err; + return 0; } /** @@ -2329,29 +2202,24 @@ int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter) { struct virtchnl2_alloc_vectors *ac = adapter->req_vec_chunks; struct virtchnl2_vector_chunks *vcs = &ac->vchunks; - int buf_size, err; + struct idpf_vc_xn_params xn_params = {}; + ssize_t reply_sz; + int buf_size; buf_size = struct_size(vcs, vchunks, le16_to_cpu(vcs->num_vchunks)); - mutex_lock(&adapter->vc_buf_lock); - - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DEALLOC_VECTORS, buf_size, - (u8 *)vcs); - if (err) - goto rel_lock; - - err = idpf_min_wait_for_event(adapter, NULL, IDPF_VC_DEALLOC_VECTORS, - IDPF_VC_DEALLOC_VECTORS_ERR); - if (err) - goto rel_lock; + xn_params.vc_op = VIRTCHNL2_OP_DEALLOC_VECTORS; + xn_params.send_buf.iov_base = vcs; + xn_params.send_buf.iov_len = buf_size; + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; kfree(adapter->req_vec_chunks); adapter->req_vec_chunks = NULL; -rel_lock: - mutex_unlock(&adapter->vc_buf_lock); - - return err; + return 0; } /** @@ -2374,25 +2242,18 @@ static int idpf_get_max_vfs(struct idpf_adapter *adapter) */ int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs) { - struct virtchnl2_sriov_vfs_info svi = { }; - int err; + struct virtchnl2_sriov_vfs_info svi = {}; + struct idpf_vc_xn_params xn_params = {}; + ssize_t reply_sz; svi.num_vfs = cpu_to_le16(num_vfs); + xn_params.vc_op = VIRTCHNL2_OP_SET_SRIOV_VFS; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = &svi; + xn_params.send_buf.iov_len = sizeof(svi); + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); - mutex_lock(&adapter->vc_buf_lock); - - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_SRIOV_VFS, - sizeof(svi), (u8 *)&svi); - if (err) - goto rel_lock; - - err = idpf_wait_for_event(adapter, NULL, IDPF_VC_SET_SRIOV_VFS, - IDPF_VC_SET_SRIOV_VFS_ERR); - -rel_lock: - mutex_unlock(&adapter->vc_buf_lock); - - return err; + return reply_sz < 0 ? reply_sz : 0; } /** @@ -2405,10 +2266,10 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct rtnl_link_stats64 *netstats = &np->netstats; - struct idpf_adapter *adapter = vport->adapter; - struct virtchnl2_vport_stats stats_msg = { }; - struct virtchnl2_vport_stats *stats; - int err; + struct virtchnl2_vport_stats stats_msg = {}; + struct idpf_vc_xn_params xn_params = {}; + ssize_t reply_sz; + /* Don't send get_stats message if the link is down */ if (np->state <= __IDPF_VPORT_DOWN) @@ -2416,46 +2277,38 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport) stats_msg.vport_id = cpu_to_le32(vport->vport_id); - mutex_lock(&vport->vc_buf_lock); + xn_params.vc_op = VIRTCHNL2_OP_GET_STATS; + xn_params.send_buf.iov_base = &stats_msg; + xn_params.send_buf.iov_len = sizeof(stats_msg); + xn_params.recv_buf = xn_params.send_buf; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_STATS, - sizeof(struct virtchnl2_vport_stats), - (u8 *)&stats_msg); - if (err) - goto rel_lock; - - err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_STATS, - IDPF_VC_GET_STATS_ERR); - if (err) - goto rel_lock; - - stats = (struct virtchnl2_vport_stats *)vport->vc_msg; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + if (reply_sz < sizeof(stats_msg)) + return -EIO; spin_lock_bh(&np->stats_lock); - netstats->rx_packets = le64_to_cpu(stats->rx_unicast) + - le64_to_cpu(stats->rx_multicast) + - le64_to_cpu(stats->rx_broadcast); - netstats->rx_bytes = le64_to_cpu(stats->rx_bytes); - netstats->rx_dropped = le64_to_cpu(stats->rx_discards); - netstats->rx_over_errors = le64_to_cpu(stats->rx_overflow_drop); - netstats->rx_length_errors = le64_to_cpu(stats->rx_invalid_frame_length); - - netstats->tx_packets = le64_to_cpu(stats->tx_unicast) + - le64_to_cpu(stats->tx_multicast) + - le64_to_cpu(stats->tx_broadcast); - netstats->tx_bytes = le64_to_cpu(stats->tx_bytes); - netstats->tx_errors = le64_to_cpu(stats->tx_errors); - netstats->tx_dropped = le64_to_cpu(stats->tx_discards); - - vport->port_stats.vport_stats = *stats; + netstats->rx_packets = le64_to_cpu(stats_msg.rx_unicast) + + le64_to_cpu(stats_msg.rx_multicast) + + le64_to_cpu(stats_msg.rx_broadcast); + netstats->tx_packets = le64_to_cpu(stats_msg.tx_unicast) + + le64_to_cpu(stats_msg.tx_multicast) + + le64_to_cpu(stats_msg.tx_broadcast); + netstats->rx_bytes = le64_to_cpu(stats_msg.rx_bytes); + netstats->tx_bytes = le64_to_cpu(stats_msg.tx_bytes); + netstats->rx_errors = le64_to_cpu(stats_msg.rx_errors); + netstats->tx_errors = le64_to_cpu(stats_msg.tx_errors); + netstats->rx_dropped = le64_to_cpu(stats_msg.rx_discards); + netstats->tx_dropped = le64_to_cpu(stats_msg.tx_discards); + + vport->port_stats.vport_stats = stats_msg; spin_unlock_bh(&np->stats_lock); -rel_lock: - mutex_unlock(&vport->vc_buf_lock); - - return err; + return 0; } /** @@ -2467,70 +2320,70 @@ rel_lock: */ int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get) { - struct idpf_adapter *adapter = vport->adapter; - struct virtchnl2_rss_lut *recv_rl; + struct virtchnl2_rss_lut *recv_rl __free(kfree) = NULL; + struct virtchnl2_rss_lut *rl __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; struct idpf_rss_data *rss_data; - struct virtchnl2_rss_lut *rl; int buf_size, lut_buf_size; - int i, err; + ssize_t reply_sz; + int i; - rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + rss_data = + &vport->adapter->vport_config[vport->idx]->user_config.rss_data; buf_size = struct_size(rl, lut, rss_data->rss_lut_size); rl = kzalloc(buf_size, GFP_KERNEL); if (!rl) return -ENOMEM; rl->vport_id = cpu_to_le32(vport->vport_id); - mutex_lock(&vport->vc_buf_lock); - if (!get) { + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = rl; + xn_params.send_buf.iov_len = buf_size; + + if (get) { + recv_rl = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); + if (!recv_rl) + return -ENOMEM; + xn_params.vc_op = VIRTCHNL2_OP_GET_RSS_LUT; + xn_params.recv_buf.iov_base = recv_rl; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + } else { rl->lut_entries = cpu_to_le16(rss_data->rss_lut_size); for (i = 0; i < rss_data->rss_lut_size; i++) rl->lut[i] = cpu_to_le32(rss_data->rss_lut[i]); - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_RSS_LUT, - buf_size, (u8 *)rl); - if (err) - goto free_mem; - - err = idpf_wait_for_event(adapter, vport, IDPF_VC_SET_RSS_LUT, - IDPF_VC_SET_RSS_LUT_ERR); - - goto free_mem; + xn_params.vc_op = VIRTCHNL2_OP_SET_RSS_LUT; } + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + if (!get) + return 0; + if (reply_sz < sizeof(struct virtchnl2_rss_lut)) + return -EIO; - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_RSS_LUT, - buf_size, (u8 *)rl); - if (err) - goto free_mem; + lut_buf_size = le16_to_cpu(recv_rl->lut_entries) * sizeof(u32); + if (reply_sz < lut_buf_size) + return -EIO; - err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_RSS_LUT, - IDPF_VC_GET_RSS_LUT_ERR); - if (err) - goto free_mem; - - recv_rl = (struct virtchnl2_rss_lut *)vport->vc_msg; + /* size didn't change, we can reuse existing lut buf */ if (rss_data->rss_lut_size == le16_to_cpu(recv_rl->lut_entries)) goto do_memcpy; rss_data->rss_lut_size = le16_to_cpu(recv_rl->lut_entries); kfree(rss_data->rss_lut); - lut_buf_size = rss_data->rss_lut_size * sizeof(u32); rss_data->rss_lut = kzalloc(lut_buf_size, GFP_KERNEL); if (!rss_data->rss_lut) { rss_data->rss_lut_size = 0; - err = -ENOMEM; - goto free_mem; + return -ENOMEM; } do_memcpy: - memcpy(rss_data->rss_lut, vport->vc_msg, rss_data->rss_lut_size); -free_mem: - mutex_unlock(&vport->vc_buf_lock); - kfree(rl); + memcpy(rss_data->rss_lut, recv_rl->lut, rss_data->rss_lut_size); - return err; + return 0; } /** @@ -2542,68 +2395,70 @@ free_mem: */ int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get) { - struct idpf_adapter *adapter = vport->adapter; - struct virtchnl2_rss_key *recv_rk; + struct virtchnl2_rss_key *recv_rk __free(kfree) = NULL; + struct virtchnl2_rss_key *rk __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; struct idpf_rss_data *rss_data; - struct virtchnl2_rss_key *rk; - int i, buf_size, err; + ssize_t reply_sz; + int i, buf_size; + u16 key_size; - rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + rss_data = + &vport->adapter->vport_config[vport->idx]->user_config.rss_data; buf_size = struct_size(rk, key_flex, rss_data->rss_key_size); rk = kzalloc(buf_size, GFP_KERNEL); if (!rk) return -ENOMEM; rk->vport_id = cpu_to_le32(vport->vport_id); - mutex_lock(&vport->vc_buf_lock); - + xn_params.send_buf.iov_base = rk; + xn_params.send_buf.iov_len = buf_size; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; if (get) { - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_RSS_KEY, - buf_size, (u8 *)rk); - if (err) - goto error; - - err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_RSS_KEY, - IDPF_VC_GET_RSS_KEY_ERR); - if (err) - goto error; - - recv_rk = (struct virtchnl2_rss_key *)vport->vc_msg; - if (rss_data->rss_key_size != - le16_to_cpu(recv_rk->key_len)) { - rss_data->rss_key_size = - min_t(u16, NETDEV_RSS_KEY_LEN, - le16_to_cpu(recv_rk->key_len)); - kfree(rss_data->rss_key); - rss_data->rss_key = kzalloc(rss_data->rss_key_size, - GFP_KERNEL); - if (!rss_data->rss_key) { - rss_data->rss_key_size = 0; - err = -ENOMEM; - goto error; - } - } - memcpy(rss_data->rss_key, recv_rk->key_flex, - rss_data->rss_key_size); + recv_rk = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); + if (!recv_rk) + return -ENOMEM; + + xn_params.vc_op = VIRTCHNL2_OP_GET_RSS_KEY; + xn_params.recv_buf.iov_base = recv_rk; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; } else { rk->key_len = cpu_to_le16(rss_data->rss_key_size); for (i = 0; i < rss_data->rss_key_size; i++) rk->key_flex[i] = rss_data->rss_key[i]; - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_RSS_KEY, - buf_size, (u8 *)rk); - if (err) - goto error; + xn_params.vc_op = VIRTCHNL2_OP_SET_RSS_KEY; + } + + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + if (!get) + return 0; + if (reply_sz < sizeof(struct virtchnl2_rss_key)) + return -EIO; + + key_size = min_t(u16, NETDEV_RSS_KEY_LEN, + le16_to_cpu(recv_rk->key_len)); + if (reply_sz < key_size) + return -EIO; - err = idpf_wait_for_event(adapter, vport, IDPF_VC_SET_RSS_KEY, - IDPF_VC_SET_RSS_KEY_ERR); + /* key len didn't change, reuse existing buf */ + if (rss_data->rss_key_size == key_size) + goto do_memcpy; + + rss_data->rss_key_size = key_size; + kfree(rss_data->rss_key); + rss_data->rss_key = kzalloc(key_size, GFP_KERNEL); + if (!rss_data->rss_key) { + rss_data->rss_key_size = 0; + return -ENOMEM; } -error: - mutex_unlock(&vport->vc_buf_lock); - kfree(rk); +do_memcpy: + memcpy(rss_data->rss_key, recv_rk->key_flex, rss_data->rss_key_size); - return err; + return 0; } /** @@ -2655,13 +2510,15 @@ static void idpf_fill_ptype_lookup(struct idpf_rx_ptype_decoded *ptype, */ int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport) { + struct virtchnl2_get_ptype_info *get_ptype_info __free(kfree) = NULL; + struct virtchnl2_get_ptype_info *ptype_info __free(kfree) = NULL; struct idpf_rx_ptype_decoded *ptype_lkup = vport->rx_ptype_lkup; - struct virtchnl2_get_ptype_info get_ptype_info; int max_ptype, ptypes_recvd = 0, ptype_offset; struct idpf_adapter *adapter = vport->adapter; - struct virtchnl2_get_ptype_info *ptype_info; + struct idpf_vc_xn_params xn_params = {}; u16 next_ptype_id = 0; - int err = 0, i, j, k; + ssize_t reply_sz; + int i, j, k; if (idpf_is_queue_model_split(vport->rxq_model)) max_ptype = IDPF_RX_MAX_PTYPE; @@ -2670,43 +2527,44 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport) memset(vport->rx_ptype_lkup, 0, sizeof(vport->rx_ptype_lkup)); + get_ptype_info = kzalloc(sizeof(*get_ptype_info), GFP_KERNEL); + if (!get_ptype_info) + return -ENOMEM; + ptype_info = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); if (!ptype_info) return -ENOMEM; - mutex_lock(&adapter->vc_buf_lock); + xn_params.vc_op = VIRTCHNL2_OP_GET_PTYPE_INFO; + xn_params.send_buf.iov_base = get_ptype_info; + xn_params.send_buf.iov_len = sizeof(*get_ptype_info); + xn_params.recv_buf.iov_base = ptype_info; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; while (next_ptype_id < max_ptype) { - get_ptype_info.start_ptype_id = cpu_to_le16(next_ptype_id); + get_ptype_info->start_ptype_id = cpu_to_le16(next_ptype_id); if ((next_ptype_id + IDPF_RX_MAX_PTYPES_PER_BUF) > max_ptype) - get_ptype_info.num_ptypes = + get_ptype_info->num_ptypes = cpu_to_le16(max_ptype - next_ptype_id); else - get_ptype_info.num_ptypes = + get_ptype_info->num_ptypes = cpu_to_le16(IDPF_RX_MAX_PTYPES_PER_BUF); - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_PTYPE_INFO, - sizeof(struct virtchnl2_get_ptype_info), - (u8 *)&get_ptype_info); - if (err) - goto vc_buf_unlock; - - err = idpf_wait_for_event(adapter, NULL, IDPF_VC_GET_PTYPE_INFO, - IDPF_VC_GET_PTYPE_INFO_ERR); - if (err) - goto vc_buf_unlock; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; - memcpy(ptype_info, adapter->vc_msg, IDPF_CTLQ_MAX_BUF_LEN); + if (reply_sz < IDPF_CTLQ_MAX_BUF_LEN) + return -EIO; ptypes_recvd += le16_to_cpu(ptype_info->num_ptypes); - if (ptypes_recvd > max_ptype) { - err = -EINVAL; - goto vc_buf_unlock; - } + if (ptypes_recvd > max_ptype) + return -EINVAL; - next_ptype_id = le16_to_cpu(get_ptype_info.start_ptype_id) + - le16_to_cpu(get_ptype_info.num_ptypes); + next_ptype_id = le16_to_cpu(get_ptype_info->start_ptype_id) + + le16_to_cpu(get_ptype_info->num_ptypes); ptype_offset = IDPF_RX_PTYPE_HDR_SZ; @@ -2719,17 +2577,13 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport) ((u8 *)ptype_info + ptype_offset); ptype_offset += IDPF_GET_PTYPE_SIZE(ptype); - if (ptype_offset > IDPF_CTLQ_MAX_BUF_LEN) { - err = -EINVAL; - goto vc_buf_unlock; - } + if (ptype_offset > IDPF_CTLQ_MAX_BUF_LEN) + return -EINVAL; /* 0xFFFF indicates end of ptypes */ if (le16_to_cpu(ptype->ptype_id_10) == - IDPF_INVALID_PTYPE_ID) { - err = 0; - goto vc_buf_unlock; - } + IDPF_INVALID_PTYPE_ID) + return 0; if (idpf_is_queue_model_split(vport->rxq_model)) k = le16_to_cpu(ptype->ptype_id_10); @@ -2857,11 +2711,7 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport) } } -vc_buf_unlock: - mutex_unlock(&adapter->vc_buf_lock); - kfree(ptype_info); - - return err; + return 0; } /** @@ -2873,27 +2723,20 @@ vc_buf_unlock: */ int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport) { + struct idpf_vc_xn_params xn_params = {}; struct virtchnl2_loopback loopback; - int err; + ssize_t reply_sz; loopback.vport_id = cpu_to_le32(vport->vport_id); loopback.enable = idpf_is_feature_ena(vport, NETIF_F_LOOPBACK); - mutex_lock(&vport->vc_buf_lock); - - err = idpf_send_mb_msg(vport->adapter, VIRTCHNL2_OP_LOOPBACK, - sizeof(loopback), (u8 *)&loopback); - if (err) - goto rel_lock; + xn_params.vc_op = VIRTCHNL2_OP_LOOPBACK; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = &loopback; + xn_params.send_buf.iov_len = sizeof(loopback); + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - err = idpf_wait_for_event(vport->adapter, vport, - IDPF_VC_LOOPBACK_STATE, - IDPF_VC_LOOPBACK_STATE_ERR); - -rel_lock: - mutex_unlock(&vport->vc_buf_lock); - - return err; + return reply_sz < 0 ? reply_sz : 0; } /** @@ -2958,7 +2801,7 @@ int idpf_init_dflt_mbx(struct idpf_adapter *adapter) return -ENOENT; } - adapter->state = __IDPF_STARTUP; + adapter->state = __IDPF_VER_CHECK; return 0; } @@ -3055,35 +2898,42 @@ int idpf_vc_core_init(struct idpf_adapter *adapter) u16 num_max_vports; int err = 0; + if (!adapter->vcxn_mngr) { + adapter->vcxn_mngr = kzalloc(sizeof(*adapter->vcxn_mngr), GFP_KERNEL); + if (!adapter->vcxn_mngr) { + err = -ENOMEM; + goto init_failed; + } + } + idpf_vc_xn_init(adapter->vcxn_mngr); + while (adapter->state != __IDPF_INIT_SW) { switch (adapter->state) { - case __IDPF_STARTUP: - if (idpf_send_ver_msg(adapter)) - goto init_failed; - adapter->state = __IDPF_VER_CHECK; - goto restart; case __IDPF_VER_CHECK: - err = idpf_recv_ver_msg(adapter); - if (err == -EIO) { - return err; - } else if (err == -EAGAIN) { - adapter->state = __IDPF_STARTUP; + err = idpf_send_ver_msg(adapter); + switch (err) { + case 0: + /* success, move state machine forward */ + adapter->state = __IDPF_GET_CAPS; + fallthrough; + case -EAGAIN: goto restart; - } else if (err) { + default: + /* Something bad happened, try again but only a + * few times. + */ goto init_failed; } - if (idpf_send_get_caps_msg(adapter)) - goto init_failed; - adapter->state = __IDPF_GET_CAPS; - goto restart; case __IDPF_GET_CAPS: - if (idpf_recv_get_caps_msg(adapter)) + err = idpf_send_get_caps_msg(adapter); + if (err) goto init_failed; adapter->state = __IDPF_INIT_SW; break; default: dev_err(&adapter->pdev->dev, "Device is in bad state: %d\n", adapter->state); + err = -EINVAL; goto init_failed; } break; @@ -3142,7 +2992,9 @@ restart: queue_delayed_work(adapter->init_wq, &adapter->init_task, msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07))); - goto no_err; + set_bit(IDPF_VC_CORE_INIT, adapter->flags); + + return 0; err_intr_req: cancel_delayed_work_sync(&adapter->serv_task); @@ -3151,7 +3003,6 @@ err_intr_req: err_netdev_alloc: kfree(adapter->vports); adapter->vports = NULL; -no_err: return err; init_failed: @@ -3168,7 +3019,9 @@ init_failed: * register writes might not have taken effect. Retry to initialize * the mailbox again */ - adapter->state = __IDPF_STARTUP; + adapter->state = __IDPF_VER_CHECK; + if (adapter->vcxn_mngr) + idpf_vc_xn_shutdown(adapter->vcxn_mngr); idpf_deinit_dflt_mbx(adapter); set_bit(IDPF_HR_DRV_LOAD, adapter->flags); queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task, @@ -3184,29 +3037,22 @@ init_failed: */ void idpf_vc_core_deinit(struct idpf_adapter *adapter) { - int i; + if (!test_bit(IDPF_VC_CORE_INIT, adapter->flags)) + return; + idpf_vc_xn_shutdown(adapter->vcxn_mngr); idpf_deinit_task(adapter); idpf_intr_rel(adapter); - /* Set all bits as we dont know on which vc_state the vhnl_wq is - * waiting on and wakeup the virtchnl workqueue even if it is waiting - * for the response as we are going down - */ - for (i = 0; i < IDPF_VC_NBITS; i++) - set_bit(i, adapter->vc_state); - wake_up(&adapter->vchnl_wq); cancel_delayed_work_sync(&adapter->serv_task); cancel_delayed_work_sync(&adapter->mbx_task); idpf_vport_params_buf_rel(adapter); - /* Clear all the bits */ - for (i = 0; i < IDPF_VC_NBITS; i++) - clear_bit(i, adapter->vc_state); - kfree(adapter->vports); adapter->vports = NULL; + + clear_bit(IDPF_VC_CORE_INIT, adapter->flags); } /** @@ -3622,6 +3468,75 @@ u32 idpf_get_vport_id(struct idpf_vport *vport) } /** + * idpf_mac_filter_async_handler - Async callback for mac filters + * @adapter: private data struct + * @xn: transaction for message + * @ctlq_msg: received message + * + * In some scenarios driver can't sleep and wait for a reply (e.g.: stack is + * holding rtnl_lock) when adding a new mac filter. It puts us in a difficult + * situation to deal with errors returned on the reply. The best we can + * ultimately do is remove it from our list of mac filters and report the + * error. + */ +static int idpf_mac_filter_async_handler(struct idpf_adapter *adapter, + struct idpf_vc_xn *xn, + const struct idpf_ctlq_msg *ctlq_msg) +{ + struct virtchnl2_mac_addr_list *ma_list; + struct idpf_vport_config *vport_config; + struct virtchnl2_mac_addr *mac_addr; + struct idpf_mac_filter *f, *tmp; + struct list_head *ma_list_head; + struct idpf_vport *vport; + u16 num_entries; + int i; + + /* if success we're done, we're only here if something bad happened */ + if (!ctlq_msg->cookie.mbx.chnl_retval) + return 0; + + /* make sure at least struct is there */ + if (xn->reply_sz < sizeof(*ma_list)) + goto invalid_payload; + + ma_list = ctlq_msg->ctx.indirect.payload->va; + mac_addr = ma_list->mac_addr_list; + num_entries = le16_to_cpu(ma_list->num_mac_addr); + /* we should have received a buffer at least this big */ + if (xn->reply_sz < struct_size(ma_list, mac_addr_list, num_entries)) + goto invalid_payload; + + vport = idpf_vid_to_vport(adapter, le32_to_cpu(ma_list->vport_id)); + if (!vport) + goto invalid_payload; + + vport_config = adapter->vport_config[le32_to_cpu(ma_list->vport_id)]; + ma_list_head = &vport_config->user_config.mac_filter_list; + + /* We can't do much to reconcile bad filters at this point, however we + * should at least remove them from our list one way or the other so we + * have some idea what good filters we have. + */ + spin_lock_bh(&vport_config->mac_filter_list_lock); + list_for_each_entry_safe(f, tmp, ma_list_head, list) + for (i = 0; i < num_entries; i++) + if (ether_addr_equal(mac_addr[i].addr, f->macaddr)) + list_del(&f->list); + spin_unlock_bh(&vport_config->mac_filter_list_lock); + dev_err_ratelimited(&adapter->pdev->dev, "Received error sending MAC filter request (op %d)\n", + xn->vc_op); + + return 0; + +invalid_payload: + dev_err_ratelimited(&adapter->pdev->dev, "Received invalid MAC filter payload (op %d) (len %zd)\n", + xn->vc_op, xn->reply_sz); + + return -EINVAL; +} + +/** * idpf_add_del_mac_filters - Add/del mac filters * @vport: Virtual port data structure * @np: Netdev private structure @@ -3634,17 +3549,21 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, struct idpf_netdev_priv *np, bool add, bool async) { - struct virtchnl2_mac_addr_list *ma_list = NULL; + struct virtchnl2_mac_addr_list *ma_list __free(kfree) = NULL; + struct virtchnl2_mac_addr *mac_addr __free(kfree) = NULL; struct idpf_adapter *adapter = np->adapter; + struct idpf_vc_xn_params xn_params = {}; struct idpf_vport_config *vport_config; - enum idpf_vport_config_flags mac_flag; - struct pci_dev *pdev = adapter->pdev; - enum idpf_vport_vc_state vc, vc_err; - struct virtchnl2_mac_addr *mac_addr; - struct idpf_mac_filter *f, *tmp; u32 num_msgs, total_filters = 0; - int i = 0, k, err = 0; - u32 vop; + struct idpf_mac_filter *f; + ssize_t reply_sz; + int i = 0, k; + + xn_params.vc_op = add ? VIRTCHNL2_OP_ADD_MAC_ADDR : + VIRTCHNL2_OP_DEL_MAC_ADDR; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.async = async; + xn_params.async_handler = idpf_mac_filter_async_handler; vport_config = adapter->vport_config[np->vport_idx]; spin_lock_bh(&vport_config->mac_filter_list_lock); @@ -3668,13 +3587,13 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, mac_addr = kcalloc(total_filters, sizeof(struct virtchnl2_mac_addr), GFP_ATOMIC); if (!mac_addr) { - err = -ENOMEM; spin_unlock_bh(&vport_config->mac_filter_list_lock); - goto error; + + return -ENOMEM; } - list_for_each_entry_safe(f, tmp, &vport_config->user_config.mac_filter_list, - list) { + list_for_each_entry(f, &vport_config->user_config.mac_filter_list, + list) { if (add && f->add) { ether_addr_copy(mac_addr[i].addr, f->macaddr); i++; @@ -3693,26 +3612,11 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, spin_unlock_bh(&vport_config->mac_filter_list_lock); - if (add) { - vop = VIRTCHNL2_OP_ADD_MAC_ADDR; - vc = IDPF_VC_ADD_MAC_ADDR; - vc_err = IDPF_VC_ADD_MAC_ADDR_ERR; - mac_flag = IDPF_VPORT_ADD_MAC_REQ; - } else { - vop = VIRTCHNL2_OP_DEL_MAC_ADDR; - vc = IDPF_VC_DEL_MAC_ADDR; - vc_err = IDPF_VC_DEL_MAC_ADDR_ERR; - mac_flag = IDPF_VPORT_DEL_MAC_REQ; - } - /* Chunk up the filters into multiple messages to avoid * sending a control queue message buffer that is too large */ num_msgs = DIV_ROUND_UP(total_filters, IDPF_NUM_FILTERS_PER_MSG); - if (!async) - mutex_lock(&vport->vc_buf_lock); - for (i = 0, k = 0; i < num_msgs; i++) { u32 entries_size, buf_size, num_entries; @@ -3724,10 +3628,8 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, if (!ma_list || num_entries != IDPF_NUM_FILTERS_PER_MSG) { kfree(ma_list); ma_list = kzalloc(buf_size, GFP_ATOMIC); - if (!ma_list) { - err = -ENOMEM; - goto list_prep_error; - } + if (!ma_list) + return -ENOMEM; } else { memset(ma_list, 0, buf_size); } @@ -3736,34 +3638,17 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, ma_list->num_mac_addr = cpu_to_le16(num_entries); memcpy(ma_list->mac_addr_list, &mac_addr[k], entries_size); - if (async) - set_bit(mac_flag, vport_config->flags); - - err = idpf_send_mb_msg(adapter, vop, buf_size, (u8 *)ma_list); - if (err) - goto mbx_error; - - if (!async) { - err = idpf_wait_for_event(adapter, vport, vc, vc_err); - if (err) - goto mbx_error; - } + xn_params.send_buf.iov_base = ma_list; + xn_params.send_buf.iov_len = buf_size; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; k += num_entries; total_filters -= num_entries; } -mbx_error: - if (!async) - mutex_unlock(&vport->vc_buf_lock); - kfree(ma_list); -list_prep_error: - kfree(mac_addr); -error: - if (err) - dev_err(&pdev->dev, "Failed to add or del mac filters %d", err); - - return err; + return 0; } /** @@ -3780,9 +3665,10 @@ int idpf_set_promiscuous(struct idpf_adapter *adapter, struct idpf_vport_user_config_data *config_data, u32 vport_id) { + struct idpf_vc_xn_params xn_params = {}; struct virtchnl2_promisc_info vpi; + ssize_t reply_sz; u16 flags = 0; - int err; if (test_bit(__IDPF_PROMISC_UC, config_data->user_flags)) flags |= VIRTCHNL2_UNICAST_PROMISC; @@ -3792,9 +3678,13 @@ int idpf_set_promiscuous(struct idpf_adapter *adapter, vpi.vport_id = cpu_to_le32(vport_id); vpi.flags = cpu_to_le16(flags); - err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE, - sizeof(struct virtchnl2_promisc_info), - (u8 *)&vpi); + xn_params.vc_op = VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = &vpi; + xn_params.send_buf.iov_len = sizeof(vpi); + /* setting promiscuous is only ever done asynchronously */ + xn_params.async = true; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); - return err; + return reply_sz < 0 ? reply_sz : 0; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h new file mode 100644 index 000000000000..83da5d8da56b --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef _IDPF_VIRTCHNL_H_ +#define _IDPF_VIRTCHNL_H_ + +struct idpf_adapter; +struct idpf_netdev_priv; +struct idpf_vec_regs; +struct idpf_vport; +struct idpf_vport_max_q; +struct idpf_vport_user_config_data; + +int idpf_init_dflt_mbx(struct idpf_adapter *adapter); +void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter); +int idpf_vc_core_init(struct idpf_adapter *adapter); +void idpf_vc_core_deinit(struct idpf_adapter *adapter); + +int idpf_get_reg_intr_vecs(struct idpf_vport *vport, + struct idpf_vec_regs *reg_vals); +int idpf_queue_reg_init(struct idpf_vport *vport); +int idpf_vport_queue_ids_init(struct idpf_vport *vport); + +int idpf_recv_mb_msg(struct idpf_adapter *adapter); +int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op, + u16 msg_size, u8 *msg, u16 cookie); + +void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q); +u32 idpf_get_vport_id(struct idpf_vport *vport); +int idpf_send_create_vport_msg(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q); +int idpf_send_destroy_vport_msg(struct idpf_vport *vport); +int idpf_send_enable_vport_msg(struct idpf_vport *vport); +int idpf_send_disable_vport_msg(struct idpf_vport *vport); + +int idpf_vport_adjust_qs(struct idpf_vport *vport); +int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q); +void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q); +int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q, + u16 num_complq, u16 num_rx_q, u16 num_rx_bufq); +int idpf_send_delete_queues_msg(struct idpf_vport *vport); +int idpf_send_enable_queues_msg(struct idpf_vport *vport); +int idpf_send_disable_queues_msg(struct idpf_vport *vport); +int idpf_send_config_queues_msg(struct idpf_vport *vport); + +int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport); +int idpf_get_vec_ids(struct idpf_adapter *adapter, + u16 *vecids, int num_vecids, + struct virtchnl2_vector_chunks *chunks); +int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors); +int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter); +int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map); + +int idpf_add_del_mac_filters(struct idpf_vport *vport, + struct idpf_netdev_priv *np, + bool add, bool async); +int idpf_set_promiscuous(struct idpf_adapter *adapter, + struct idpf_vport_user_config_data *config_data, + u32 vport_id); +int idpf_check_supported_desc_ids(struct idpf_vport *vport); +int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport); +int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport); +int idpf_send_get_stats_msg(struct idpf_vport *vport); +int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs); +int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get); +int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get); + +#endif /* _IDPF_VIRTCHNL_H_ */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index cebb44f51d5f..518298bbdadc 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -202,7 +202,7 @@ static struct notifier_block dca_notifier = { #endif #ifdef CONFIG_PCI_IOV static unsigned int max_vfs; -module_param(max_vfs, uint, 0); +module_param(max_vfs, uint, 0444); MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function"); #endif /* CONFIG_PCI_IOV */ @@ -2538,7 +2538,7 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 319c544b9f04..f94570556120 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -957,7 +957,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); /* adjust timestamp for the TX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_TX_LATENCY_10; @@ -1003,6 +1003,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, ktime_t *timestamp) { struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; struct skb_shared_hwtstamps ts; __le64 *regval = (__le64 *)va; int adjust = 0; @@ -1022,7 +1023,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1])); /* adjust timestamp for the RX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_RX_LATENCY_10; diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index a4d4f00e6a87..b0cf310e6f7b 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2655,7 +2655,7 @@ igbvf_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IGBVF_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index cfa6baccec55..90316dc58630 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -570,7 +570,6 @@ struct igc_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[IFNAMSIZ + 9]; - struct net_device poll_dev; /* for dynamic allocation of rings associated with this q_vector */ struct igc_ring ring[] ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 3af52d238f3b..14924dd8a4b9 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5277,7 +5277,7 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | @@ -6488,7 +6488,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int cpu = smp_processor_id(); struct netdev_queue *nq; struct igc_ring *ring; - int i, drops; + int i, nxmit; if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; @@ -6504,16 +6504,15 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, /* Avoid transmit queue timeout since we share it with the slow path */ txq_trans_cond_update(nq); - drops = 0; + nxmit = 0; for (i = 0; i < num_frames; i++) { int err; struct xdp_frame *xdpf = frames[i]; err = igc_xdp_init_tx_descriptor(ring, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err) + break; + nxmit++; } if (flags & XDP_XMIT_FLUSH) @@ -6521,7 +6520,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_unlock(nq); - return num_frames - drops; + return nxmit; } static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index e0c300fe5cee..cdaf087b4e85 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -334,7 +334,9 @@ static int ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) { + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core1) { *speed = IXGBE_LINK_SPEED_1GB_FULL; *autoneg = true; return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 633bac1543dd..6e6e6f1847b6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -349,6 +349,8 @@ static int ixgbe_get_link_ksettings(struct net_device *netdev, case ixgbe_sfp_type_1g_sx_core1: case ixgbe_sfp_type_1g_lx_core0: case ixgbe_sfp_type_1g_lx_core1: + case ixgbe_sfp_type_1g_bx_core0: + case ixgbe_sfp_type_1g_bx_core1: ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); ethtool_link_ksettings_add_link_mode(cmd, advertising, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e23c3614fb10..8fe8107eb854 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -153,7 +153,7 @@ MODULE_PARM_DESC(max_vfs, #endif /* CONFIG_PCI_IOV */ static bool allow_unsupported_sfp; -module_param(allow_unsupported_sfp, bool, 0); +module_param(allow_unsupported_sfp, bool, 0444); MODULE_PARM_DESC(allow_unsupported_sfp, "Allow unsupported and untested SFP+ modules on 82599-based adapters"); @@ -2939,8 +2939,8 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter) static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, u64 qmask) { - u32 mask; struct ixgbe_hw *hw = &adapter->hw; + u32 mask; switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -10205,7 +10205,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | @@ -10525,6 +10525,44 @@ static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring) } /** + * ixgbe_irq_disable_single - Disable single IRQ vector + * @adapter: adapter structure + * @ring: ring index + **/ +static void ixgbe_irq_disable_single(struct ixgbe_adapter *adapter, u32 ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 qmask = BIT_ULL(ring); + u32 mask; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = qmask & IXGBE_EIMC_RTX_QUEUE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + mask = (qmask & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (qmask >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + break; + default: + break; + } + IXGBE_WRITE_FLUSH(&adapter->hw); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + synchronize_irq(adapter->msix_entries[ring].vector); + else + synchronize_irq(adapter->pdev->irq); +} + +/** * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings * @adapter: adapter structure * @ring: ring index @@ -10540,6 +10578,11 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; + ixgbe_irq_disable_single(adapter, ring); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_disable(&rx_ring->q_vector->napi); + ixgbe_disable_txr(adapter, tx_ring); if (xdp_ring) ixgbe_disable_txr(adapter, xdp_ring); @@ -10548,9 +10591,6 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) if (xdp_ring) synchronize_rcu(); - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_disable(&rx_ring->q_vector->napi); - ixgbe_clean_tx_ring(tx_ring); if (xdp_ring) ixgbe_clean_tx_ring(xdp_ring); @@ -10578,9 +10618,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_enable(&rx_ring->q_vector->napi); - ixgbe_configure_tx_ring(adapter, tx_ring); if (xdp_ring) ixgbe_configure_tx_ring(adapter, xdp_ring); @@ -10589,6 +10626,11 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); if (xdp_ring) clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_enable(&rx_ring->q_vector->napi); + ixgbe_irq_enable_queues(adapter, BIT_ULL(ring)); + IXGBE_WRITE_FLUSH(&adapter->hw); } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 75e9453331ed..07eaa3c3f4d3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -1532,6 +1532,7 @@ int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type; struct ixgbe_adapter *adapter = hw->back; u8 oui_bytes[3] = {0, 0, 0}; + u8 bitrate_nominal = 0; u8 comp_codes_10g = 0; u8 comp_codes_1g = 0; u16 enforce_sfp = 0; @@ -1576,7 +1577,12 @@ int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_CABLE_TECHNOLOGY, &cable_tech); + if (status) + goto err_read_i2c_eeprom; + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_BITRATE_NOMINAL, + &bitrate_nominal); if (status) goto err_read_i2c_eeprom; @@ -1659,6 +1665,18 @@ int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) else hw->phy.sfp_type = ixgbe_sfp_type_1g_lx_core1; + /* Support only Ethernet 1000BASE-BX10, checking the Bit Rate + * Nominal Value as per SFF-8472 by convention 1.25 Gb/s should + * be rounded up to 0Dh (13 in units of 100 MBd) for 1000BASE-BX + */ + } else if ((comp_codes_1g & IXGBE_SFF_BASEBX10_CAPABLE) && + (bitrate_nominal == 0xD)) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_1g_bx_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_1g_bx_core1; } else { hw->phy.sfp_type = ixgbe_sfp_type_unknown; } @@ -1747,7 +1765,9 @@ int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) { + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core1)) { hw->phy.type = ixgbe_phy_sfp_unsupported; return -EOPNOTSUPP; } @@ -1763,7 +1783,9 @@ int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) { + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core1)) { /* Make sure we're a supported PHY type */ if (hw->phy.type == ixgbe_phy_sfp_intel) return 0; @@ -1999,12 +2021,14 @@ int ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 || sfp_type == ixgbe_sfp_type_1g_lx_core0 || sfp_type == ixgbe_sfp_type_1g_cu_core0 || - sfp_type == ixgbe_sfp_type_1g_sx_core0) + sfp_type == ixgbe_sfp_type_1g_sx_core0 || + sfp_type == ixgbe_sfp_type_1g_bx_core0) sfp_type = ixgbe_sfp_type_srlr_core0; else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 || sfp_type == ixgbe_sfp_type_1g_lx_core1 || sfp_type == ixgbe_sfp_type_1g_cu_core1 || - sfp_type == ixgbe_sfp_type_1g_sx_core1) + sfp_type == ixgbe_sfp_type_1g_sx_core1 || + sfp_type == ixgbe_sfp_type_1g_bx_core1) sfp_type = ixgbe_sfp_type_srlr_core1; /* Read offset to PHY init contents */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index beedcb7bec0d..14aa2ca51f70 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -17,6 +17,7 @@ #define IXGBE_SFF_1GBE_COMP_CODES 0x6 #define IXGBE_SFF_10GBE_COMP_CODES 0x3 #define IXGBE_SFF_CABLE_TECHNOLOGY 0x8 +#define IXGBE_SFF_BITRATE_NOMINAL 0xC #define IXGBE_SFF_CABLE_SPEC_COMP 0x3C #define IXGBE_SFF_SFF_8472_SWAP 0x5C #define IXGBE_SFF_SFF_8472_COMP 0x5E @@ -39,6 +40,7 @@ #define IXGBE_SFF_1GBASESX_CAPABLE 0x1 #define IXGBE_SFF_1GBASELX_CAPABLE 0x2 #define IXGBE_SFF_1GBASET_CAPABLE 0x8 +#define IXGBE_SFF_BASEBX10_CAPABLE 0x64 #define IXGBE_SFF_10GBASESR_CAPABLE 0x10 #define IXGBE_SFF_10GBASELR_CAPABLE 0x20 #define IXGBE_SFF_SOFT_RS_SELECT_MASK 0x8 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index d44c58130be2..ed440dd0c4f9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3210,6 +3210,9 @@ enum ixgbe_sfp_type { ixgbe_sfp_type_1g_sx_core1 = 12, ixgbe_sfp_type_1g_lx_core0 = 13, ixgbe_sfp_type_1g_lx_core1 = 14, + ixgbe_sfp_type_1g_bx_core0 = 15, + ixgbe_sfp_type_1g_bx_core1 = 16, + ixgbe_sfp_type_not_present = 0xFFFE, ixgbe_sfp_type_unknown = 0xFFFF }; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index a44e4bd56142..9c960017a6de 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4413,7 +4413,7 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IXGBEVF_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index edd12d09dc89..07d4859de53a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -817,6 +817,8 @@ static int rvu_fwdata_init(struct rvu *rvu) err = cgx_get_fwdata_base(&fwdbase); if (err) goto fail; + + BUILD_BUG_ON(offsetof(struct rvu_fwdata, cgx_fw_data) > FWDATA_CGX_LMAC_OFFSET); rvu->fwdata = ioremap_wc(fwdbase, sizeof(struct rvu_fwdata)); if (!rvu->fwdata) goto fail; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index de8eba902276..f390525a6217 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -469,11 +469,12 @@ struct rvu_fwdata { u32 ptp_ext_clk_rate; u32 ptp_ext_tstamp; struct channel_fwdata channel_data; -#define FWDATA_RESERVED_MEM 1014 +#define FWDATA_RESERVED_MEM 958 u64 reserved[FWDATA_RESERVED_MEM]; #define CGX_MAX 9 #define CGX_LMACS_MAX 4 #define CGX_LMACS_USX 8 +#define FWDATA_CGX_LMAC_OFFSET 10536 union { struct cgx_lmac_fwdata_s cgx_fw_data[CGX_MAX][CGX_LMACS_MAX]; diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c index d58b07e7e123..7063c78bd35f 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c @@ -286,7 +286,6 @@ mtk_wed_wo_queue_free(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) static void mtk_wed_wo_queue_tx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) { - struct page *page; int i; for (i = 0; i < q->n_desc; i++) { @@ -301,19 +300,12 @@ mtk_wed_wo_queue_tx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) entry->buf = NULL; } - if (!q->cache.va) - return; - - page = virt_to_page(q->cache.va); - __page_frag_cache_drain(page, q->cache.pagecnt_bias); - memset(&q->cache, 0, sizeof(q->cache)); + page_frag_cache_drain(&q->cache); } static void mtk_wed_wo_queue_rx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) { - struct page *page; - for (;;) { void *buf = mtk_wed_wo_dequeue(wo, q, NULL, true); @@ -323,12 +315,7 @@ mtk_wed_wo_queue_rx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) skb_free_frag(buf); } - if (!q->cache.va) - return; - - page = virt_to_page(q->cache.va); - __page_frag_cache_drain(page, q->cache.pagecnt_bias); - memset(&q->cache, 0, sizeof(q->cache)); + page_frag_cache_drain(&q->cache); } static void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3e064234f6fe..98d4306929f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -157,6 +157,12 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, return -EOPNOTSUPP; } + if (action == DEVLINK_RELOAD_ACTION_FW_ACTIVATE && + !dev->priv.fw_reset) { + NL_SET_ERR_MSG_MOD(extack, "FW activate is unsupported for this function"); + return -EOPNOTSUPP; + } + if (mlx5_core_is_pf(dev) && pci_num_vf(pdev)) NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c index c9c7fddb246f..904e08de852e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c @@ -285,7 +285,7 @@ static void mlx5_dpll_netdev_dpll_pin_set(struct mlx5_dpll *mdpll, { if (mdpll->tracking_netdev) return; - netdev_dpll_pin_set(netdev, mdpll->dpll_pin); + dpll_netdev_pin_set(netdev, mdpll->dpll_pin); mdpll->tracking_netdev = netdev; } @@ -293,7 +293,7 @@ static void mlx5_dpll_netdev_dpll_pin_clear(struct mlx5_dpll *mdpll) { if (!mdpll->tracking_netdev) return; - netdev_dpll_pin_clear(mdpll->tracking_netdev); + dpll_netdev_pin_clear(mdpll->tracking_netdev); mdpll->tracking_netdev = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index fd4ef6431142..a848a7fda66b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -42,9 +42,9 @@ mlx5e_ptp_port_ts_cqe_list_add(struct mlx5e_ptp_port_ts_cqe_list *list, u8 metad WARN_ON_ONCE(tracker->inuse); tracker->inuse = true; - spin_lock(&list->tracker_list_lock); + spin_lock_bh(&list->tracker_list_lock); list_add_tail(&tracker->entry, &list->tracker_list_head); - spin_unlock(&list->tracker_list_lock); + spin_unlock_bh(&list->tracker_list_lock); } static void @@ -54,9 +54,9 @@ mlx5e_ptp_port_ts_cqe_list_remove(struct mlx5e_ptp_port_ts_cqe_list *list, u8 me WARN_ON_ONCE(!tracker->inuse); tracker->inuse = false; - spin_lock(&list->tracker_list_lock); + spin_lock_bh(&list->tracker_list_lock); list_del(&tracker->entry); - spin_unlock(&list->tracker_list_lock); + spin_unlock_bh(&list->tracker_list_lock); } void mlx5e_ptpsq_track_metadata(struct mlx5e_ptpsq *ptpsq, u8 metadata) @@ -155,7 +155,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, struct mlx5e_ptp_metadata_map *metadata_map = &ptpsq->metadata_map; struct mlx5e_ptp_port_ts_cqe_tracker *pos, *n; - spin_lock(&cqe_list->tracker_list_lock); + spin_lock_bh(&cqe_list->tracker_list_lock); list_for_each_entry_safe(pos, n, &cqe_list->tracker_list_head, entry) { struct sk_buff *skb = mlx5e_ptp_metadata_map_lookup(metadata_map, pos->metadata_id); @@ -170,7 +170,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, pos->inuse = false; list_del(&pos->entry); } - spin_unlock(&cqe_list->tracker_list_lock); + spin_unlock_bh(&cqe_list->tracker_list_lock); } #define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 86bf007fd05b..b500cc2c9689 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -37,7 +37,7 @@ mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) { if (priv->mdev->coredev_type == MLX5_COREDEV_PF) - mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + mlx5_core_dbg(priv->mdev, "firmware flow level support is missing\n"); err = -EOPNOTSUPP; goto err_check; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index d4ebd8743114..b2cabd6ab86c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -310,9 +310,9 @@ static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_o mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } -static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, - struct mlx5e_macsec_sa *sa, - bool is_tx, struct net_device *netdev, u32 fs_id) +static void mlx5e_macsec_cleanup_sa_fs(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, bool is_tx, + struct net_device *netdev, u32 fs_id) { int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : MLX5_ACCEL_MACSEC_ACTION_DECRYPT; @@ -322,20 +322,49 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, mlx5_macsec_fs_del_rule(macsec->mdev->macsec_fs, sa->macsec_rule, action, netdev, fs_id); - mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); sa->macsec_rule = NULL; } +static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, bool is_tx, + struct net_device *netdev, u32 fs_id) +{ + mlx5e_macsec_cleanup_sa_fs(macsec, sa, is_tx, netdev, fs_id); + mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); +} + +static int mlx5e_macsec_init_sa_fs(struct macsec_context *ctx, + struct mlx5e_macsec_sa *sa, bool encrypt, + bool is_tx, u32 *fs_id) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + struct mlx5_macsec_fs *macsec_fs = priv->mdev->macsec_fs; + struct mlx5_macsec_rule_attrs rule_attrs; + union mlx5_macsec_rule *macsec_rule; + + rule_attrs.macsec_obj_id = sa->macsec_obj_id; + rule_attrs.sci = sa->sci; + rule_attrs.assoc_num = sa->assoc_num; + rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : + MLX5_ACCEL_MACSEC_ACTION_DECRYPT; + + macsec_rule = mlx5_macsec_fs_add_rule(macsec_fs, ctx, &rule_attrs, fs_id); + if (!macsec_rule) + return -ENOMEM; + + sa->macsec_rule = macsec_rule; + + return 0; +} + static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5e_macsec_sa *sa, bool encrypt, bool is_tx, u32 *fs_id) { struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec *macsec = priv->macsec; - struct mlx5_macsec_rule_attrs rule_attrs; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_macsec_obj_attrs obj_attrs; - union mlx5_macsec_rule *macsec_rule; int err; obj_attrs.next_pn = sa->next_pn; @@ -357,20 +386,12 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, if (err) return err; - rule_attrs.macsec_obj_id = sa->macsec_obj_id; - rule_attrs.sci = sa->sci; - rule_attrs.assoc_num = sa->assoc_num; - rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : - MLX5_ACCEL_MACSEC_ACTION_DECRYPT; - - macsec_rule = mlx5_macsec_fs_add_rule(mdev->macsec_fs, ctx, &rule_attrs, fs_id); - if (!macsec_rule) { - err = -ENOMEM; - goto destroy_macsec_object; + if (sa->active) { + err = mlx5e_macsec_init_sa_fs(ctx, sa, encrypt, is_tx, fs_id); + if (err) + goto destroy_macsec_object; } - sa->macsec_rule = macsec_rule; - return 0; destroy_macsec_object: @@ -526,9 +547,7 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) goto destroy_sa; macsec_device->tx_sa[assoc_num] = tx_sa; - if (!secy->operational || - assoc_num != tx_sc->encoding_sa || - !tx_sa->active) + if (!secy->operational) goto out; err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); @@ -595,7 +614,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; if (ctx_tx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + err = mlx5e_macsec_init_sa_fs(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto out; } else { @@ -604,7 +623,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + mlx5e_macsec_cleanup_sa_fs(macsec, tx_sa, true, ctx->secy->netdev, 0); } out: mutex_unlock(&macsec->lock); @@ -1030,8 +1049,9 @@ static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, - rx_sc->sc_xarray_element->fs_id); + if (rx_sa->active) + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); kfree(rx_sa); rx_sc->rx_sa[assoc_num] = NULL; @@ -1112,8 +1132,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, if (!rx_sa || !rx_sa->macsec_rule) continue; - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, - rx_sc->sc_xarray_element->fs_id); + mlx5e_macsec_cleanup_sa_fs(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); } } @@ -1124,8 +1144,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, continue; if (rx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false, - &rx_sc->sc_xarray_element->fs_id); + err = mlx5e_macsec_init_sa_fs(ctx, rx_sa, true, false, + &rx_sc->sc_xarray_element->fs_id); if (err) goto out; } @@ -1178,7 +1198,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) if (!tx_sa) continue; - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + mlx5e_macsec_cleanup_sa_fs(macsec, tx_sa, true, ctx->secy->netdev, 0); } for (i = 0; i < MACSEC_NUM_AN; ++i) { @@ -1187,7 +1207,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) continue; if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + err = mlx5e_macsec_init_sa_fs(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5c166d9d2dca..2fa076b23fbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -401,6 +401,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_skb_cb_hwtstamp_init(skb); mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, metadata_index); + /* ensure skb is put on metadata_map before tracking the index */ + wmb(); mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); if (!netif_tx_queue_stopped(sq->txq) && mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 190f10aba170..5a0047bdcb51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -152,7 +152,7 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) xa_for_each(&esw->offloads.vport_reps, i, rep) { rpriv = rep->rep_data[REP_ETH].priv; - if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems)) + if (!rpriv || !rpriv->netdev) continue; rhashtable_walk_enter(&rpriv->tc_ht, &iter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0455134c98e..baaae628b0a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -535,21 +535,26 @@ esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) } static bool -esw_dests_to_vf_pf_vports(struct mlx5_flow_destination *dests, int max_dest) +esw_dests_to_int_external(struct mlx5_flow_destination *dests, int max_dest) { - bool vf_dest = false, pf_dest = false; + bool internal_dest = false, external_dest = false; int i; for (i = 0; i < max_dest; i++) { - if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT && + dests[i].type != MLX5_FLOW_DESTINATION_TYPE_UPLINK) continue; - if (dests[i].vport.num == MLX5_VPORT_UPLINK) - pf_dest = true; + /* Uplink dest is external, but considered as internal + * if there is reformat because firmware uses LB+hairpin to support it. + */ + if (dests[i].vport.num == MLX5_VPORT_UPLINK && + !(dests[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) + external_dest = true; else - vf_dest = true; + internal_dest = true; - if (vf_dest && pf_dest) + if (internal_dest && external_dest) return true; } @@ -695,9 +700,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, /* Header rewrite with combined wire+loopback in FDB is not allowed */ if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) && - esw_dests_to_vf_pf_vports(dest, i)) { + esw_dests_to_int_external(dest, i)) { esw_warn(esw->dev, - "FDB: Header rewrite with forwarding to both PF and VF is not allowed\n"); + "FDB: Header rewrite with forwarding to both internal and external dests is not allowed\n"); rule = ERR_PTR(-EINVAL); goto err_esw_get; } @@ -3658,22 +3663,6 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - struct net *devl_net, *netdev_net; - bool ret = false; - - mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); - if (dev->mlx5e_res.uplink_netdev) { - netdev_net = dev_net(dev->mlx5e_res.uplink_netdev); - devl_net = devlink_net(devlink); - ret = net_eq(devl_net, netdev_net); - } - mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); - return ret; -} - int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -3718,13 +3707,6 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && - !esw_offloads_devlink_ns_eq_netdev_ns(devlink)) { - NL_SET_ERR_MSG_MOD(extack, - "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); - return -EPERM; - } - mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index f27eab6e4929..2911aa34a5be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -703,19 +703,30 @@ void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT); mlx5_eq_notifier_register(dev, &fw_reset->nb); } void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev) { - mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb); + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (!fw_reset) + return; + + mlx5_eq_notifier_unregister(dev, &fw_reset->nb); } void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags); cancel_work_sync(&fw_reset->fw_live_patch_work); cancel_work_sync(&fw_reset->reset_request_work); @@ -733,9 +744,13 @@ static const struct devlink_param mlx5_fw_reset_devlink_params[] = { int mlx5_fw_reset_init(struct mlx5_core_dev *dev) { - struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); + struct mlx5_fw_reset *fw_reset; int err; + if (!MLX5_CAP_MCAM_REG(dev, mfrl)) + return 0; + + fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); if (!fw_reset) return -ENOMEM; fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events"); @@ -771,6 +786,9 @@ void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + devl_params_unregister(priv_to_devlink(dev), mlx5_fw_reset_devlink_params, ARRAY_SIZE(mlx5_fw_reset_devlink_params)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9463ede84d8d..ad38e31822df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -452,10 +452,10 @@ mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct health_buffer __iomem *h = health->health; u8 synd = ioread8(&h->synd); + devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); if (!synd) return 0; - devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c index 253d7ad9b809..8b63968bbee9 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c @@ -124,6 +124,41 @@ static void mlxbf_gige_get_pauseparam(struct net_device *netdev, pause->tx_pause = 1; } +static bool mlxbf_gige_llu_counters_enabled(struct mlxbf_gige *priv) +{ + u32 data; + + if (priv->hw_version == MLXBF_GIGE_VERSION_BF2) { + data = readl(priv->llu_base + MLXBF_GIGE_BF2_LLU_GENERAL_CONFIG); + if (data & MLXBF_GIGE_BF2_LLU_COUNTERS_EN) + return true; + } else { + data = readl(priv->llu_base + MLXBF_GIGE_BF3_LLU_GENERAL_CONFIG); + if (data & MLXBF_GIGE_BF3_LLU_COUNTERS_EN) + return true; + } + + return false; +} + +static void mlxbf_gige_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *pause_stats) +{ + struct mlxbf_gige *priv = netdev_priv(netdev); + u64 data_lo, data_hi; + + /* Read LLU counters to provide stats only if counters are enabled */ + if (mlxbf_gige_llu_counters_enabled(priv)) { + data_lo = readl(priv->llu_base + MLXBF_GIGE_TX_PAUSE_CNT_LO); + data_hi = readl(priv->llu_base + MLXBF_GIGE_TX_PAUSE_CNT_HI); + pause_stats->tx_pause_frames = (data_hi << 32) | data_lo; + + data_lo = readl(priv->llu_base + MLXBF_GIGE_RX_PAUSE_CNT_LO); + data_hi = readl(priv->llu_base + MLXBF_GIGE_RX_PAUSE_CNT_HI); + pause_stats->rx_pause_frames = (data_hi << 32) | data_lo; + } +} + const struct ethtool_ops mlxbf_gige_ethtool_ops = { .get_link = ethtool_op_get_link, .get_ringparam = mlxbf_gige_get_ringparam, @@ -134,6 +169,7 @@ const struct ethtool_ops mlxbf_gige_ethtool_ops = { .get_ethtool_stats = mlxbf_gige_get_ethtool_stats, .nway_reset = phy_ethtool_nway_reset, .get_pauseparam = mlxbf_gige_get_pauseparam, + .get_pause_stats = mlxbf_gige_get_pause_stats, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, }; diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h index cd0973229c9b..98a8681c21b9 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h @@ -99,4 +99,34 @@ #define MLXBF_GIGE_100M_IPG_SIZE 119 #define MLXBF_GIGE_10M_IPG_SIZE 1199 +/* Offsets into OOB LLU block for pause frame counters */ +#define MLXBF_GIGE_BF2_TX_PAUSE_CNT_HI 0x33d8 +#define MLXBF_GIGE_BF2_TX_PAUSE_CNT_LO 0x33dc +#define MLXBF_GIGE_BF2_RX_PAUSE_CNT_HI 0x3210 +#define MLXBF_GIGE_BF2_RX_PAUSE_CNT_LO 0x3214 + +#define MLXBF_GIGE_BF3_TX_PAUSE_CNT_HI 0x3a88 +#define MLXBF_GIGE_BF3_TX_PAUSE_CNT_LO 0x3a8c +#define MLXBF_GIGE_BF3_RX_PAUSE_CNT_HI 0x38c0 +#define MLXBF_GIGE_BF3_RX_PAUSE_CNT_LO 0x38c4 + +#define MLXBF_GIGE_TX_PAUSE_CNT_HI ((priv->hw_version == MLXBF_GIGE_VERSION_BF2) ? \ + MLXBF_GIGE_BF2_TX_PAUSE_CNT_HI : \ + MLXBF_GIGE_BF3_TX_PAUSE_CNT_HI) +#define MLXBF_GIGE_TX_PAUSE_CNT_LO ((priv->hw_version == MLXBF_GIGE_VERSION_BF2) ? \ + MLXBF_GIGE_BF2_TX_PAUSE_CNT_LO : \ + MLXBF_GIGE_BF3_TX_PAUSE_CNT_LO) +#define MLXBF_GIGE_RX_PAUSE_CNT_HI ((priv->hw_version == MLXBF_GIGE_VERSION_BF2) ? \ + MLXBF_GIGE_BF2_RX_PAUSE_CNT_HI : \ + MLXBF_GIGE_BF3_RX_PAUSE_CNT_HI) +#define MLXBF_GIGE_RX_PAUSE_CNT_LO ((priv->hw_version == MLXBF_GIGE_VERSION_BF2) ? \ + MLXBF_GIGE_BF2_RX_PAUSE_CNT_LO : \ + MLXBF_GIGE_BF3_RX_PAUSE_CNT_LO) + +#define MLXBF_GIGE_BF2_LLU_GENERAL_CONFIG 0x2110 +#define MLXBF_GIGE_BF3_LLU_GENERAL_CONFIG 0x2030 + +#define MLXBF_GIGE_BF2_LLU_COUNTERS_EN BIT(0) +#define MLXBF_GIGE_BF3_LLU_COUNTERS_EN BIT(4) + #endif /* !defined(__MLXBF_GIGE_REGS_H__) */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 4af285918ea2..75868b3f548e 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -347,10 +347,10 @@ int sparx5_del_mact_entry(struct sparx5 *sparx5, list) { if ((vid == 0 || mact_entry->vid == vid) && ether_addr_equal(addr, mact_entry->mac)) { + sparx5_mact_forget(sparx5, addr, mact_entry->vid); + list_del(&mact_entry->list); devm_kfree(sparx5->dev, mact_entry); - - sparx5_mact_forget(sparx5, addr, mact_entry->vid); } } mutex_unlock(&sparx5->mact_lock); diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 2b383d92d7f5..2c3f62907958 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -460,7 +460,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, set_tun->ttl = ip6_dst_hoplimit(dst); dst_release(dst); } else { - set_tun->ttl = net->ipv6.devconf_all->hop_limit; + set_tun->ttl = READ_ONCE(net->ipv6.devconf_all->hop_limit); } #endif } else { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 10a9d80db32c..6ba8d4aca0a0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -93,6 +93,7 @@ static void ionic_unmap_bars(struct ionic *ionic) bars[i].len = 0; } } + ionic->num_bars = 0; } void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num) @@ -215,13 +216,15 @@ out: static void ionic_clear_pci(struct ionic *ionic) { - ionic->idev.dev_info_regs = NULL; - ionic->idev.dev_cmd_regs = NULL; - ionic->idev.intr_status = NULL; - ionic->idev.intr_ctrl = NULL; - - ionic_unmap_bars(ionic); - pci_release_regions(ionic->pdev); + if (ionic->num_bars) { + ionic->idev.dev_info_regs = NULL; + ionic->idev.dev_cmd_regs = NULL; + ionic->idev.intr_status = NULL; + ionic->idev.intr_ctrl = NULL; + + ionic_unmap_bars(ionic); + pci_release_regions(ionic->pdev); + } if (pci_is_enabled(ionic->pdev)) pci_disable_device(ionic->pdev); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c index 91327ef670c7..c3ae11a48024 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c @@ -113,8 +113,8 @@ static const struct debugfs_reg32 intr_ctrl_regs[] = { void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq) { struct dentry *qcq_dentry, *q_dentry, *cq_dentry; - struct dentry *intr_dentry, *stats_dentry; struct ionic_dev *idev = &lif->ionic->idev; + struct dentry *intr_dentry, *stats_dentry; struct debugfs_regset32 *intr_ctrl_regset; struct ionic_intr_info *intr = &qcq->intr; struct debugfs_blob_wrapper *desc_blob; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 1e7c71f7f081..746072b4dbd0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -319,22 +319,32 @@ do_check_time: u8 ionic_dev_cmd_status(struct ionic_dev *idev) { + if (!idev->dev_cmd_regs) + return (u8)PCI_ERROR_RESPONSE; return ioread8(&idev->dev_cmd_regs->comp.comp.status); } bool ionic_dev_cmd_done(struct ionic_dev *idev) { + if (!idev->dev_cmd_regs) + return false; return ioread32(&idev->dev_cmd_regs->done) & IONIC_DEV_CMD_DONE; } void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp) { + if (!idev->dev_cmd_regs) + return; memcpy_fromio(comp, &idev->dev_cmd_regs->comp, sizeof(*comp)); } void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd) { idev->opcode = cmd->cmd.opcode; + + if (!idev->dev_cmd_regs) + return; + memcpy_toio(&idev->dev_cmd_regs->cmd, cmd, sizeof(*cmd)); iowrite32(0, &idev->dev_cmd_regs->done); iowrite32(1, &idev->dev_cmd_regs->doorbell); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index bfcfc2d7bcbd..516db910e8e8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -16,9 +16,10 @@ #define IONIC_MAX_TX_DESC 8192 #define IONIC_MAX_RX_DESC 16384 #define IONIC_MIN_TXRX_DESC 64 -#define IONIC_DEF_TXRX_DESC 4096 +#define IONIC_DEF_TXRX_DESC 1024 #define IONIC_RX_FILL_THRESHOLD 16 #define IONIC_RX_FILL_DIV 8 +#define IONIC_TSO_DESCS_NEEDED 44 /* 64K TSO @1500B */ #define IONIC_LIFS_MAX 1024 #define IONIC_WATCHDOG_SECS 5 #define IONIC_ITR_COAL_USEC_DEFAULT 64 @@ -379,6 +380,7 @@ typedef void (*ionic_cq_done_cb)(void *done_arg); unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do, ionic_cq_cb cb, ionic_cq_done_cb done_cb, void *done_arg); +unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do); int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev, struct ionic_queue *q, unsigned int index, const char *name, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 98df2ee11c51..91183965a6b7 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -90,18 +90,23 @@ static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) { struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_dev *idev; unsigned int offset; unsigned int size; regs->version = IONIC_DEV_CMD_REG_VERSION; + idev = &lif->ionic->idev; + if (!idev->dev_info_regs) + return; + offset = 0; size = IONIC_DEV_INFO_REG_COUNT * sizeof(u32); memcpy_fromio(p + offset, lif->ionic->idev.dev_info_regs->words, size); offset += size; size = IONIC_DEV_CMD_REG_COUNT * sizeof(u32); - memcpy_fromio(p + offset, lif->ionic->idev.dev_cmd_regs->words, size); + memcpy_fromio(p + offset, idev->dev_cmd_regs->words, size); } static void ionic_get_link_ext_stats(struct net_device *netdev, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_fw.c b/drivers/net/ethernet/pensando/ionic/ionic_fw.c index 5f40324cd243..3c209c1a2337 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_fw.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_fw.c @@ -109,6 +109,11 @@ int ionic_firmware_update(struct ionic_lif *lif, const struct firmware *fw, dl = priv_to_devlink(ionic); devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0); + if (!idev->dev_cmd_regs) { + err = -ENXIO; + goto err_out; + } + buf_sz = sizeof(idev->dev_cmd_regs->data); netdev_dbg(netdev, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 5cfc784f1227..33b1691a4ee5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -52,15 +52,20 @@ static void ionic_xdp_unregister_rxq_info(struct ionic_queue *q); static void ionic_dim_work(struct work_struct *work) { struct dim *dim = container_of(work, struct dim, work); - struct ionic_intr_info *intr; struct dim_cq_moder cur_moder; + struct ionic_intr_info *intr; struct ionic_qcq *qcq; struct ionic_lif *lif; + struct ionic_queue *q; u32 new_coal; - cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); qcq = container_of(dim, struct ionic_qcq, dim); - lif = qcq->q.lif; + q = &qcq->q; + if (q->type == IONIC_QTYPE_RXQ) + cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + else + cur_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + lif = q->lif; new_coal = ionic_coal_usec_to_hw(lif->ionic, cur_moder.usec); new_coal = new_coal ? new_coal : 1; @@ -685,7 +690,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, } INIT_WORK(&new->dim.work, ionic_dim_work); - new->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + new->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; *qcq = new; @@ -1262,8 +1267,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget) ionic_rx_service, NULL, NULL); if (lif->hwstamp_txq) - tx_work = ionic_cq_service(&lif->hwstamp_txq->cq, budget, - ionic_tx_service, NULL, NULL); + tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget); work_done = max(max(n_work, a_work), max(rx_work, tx_work)); if (work_done < budget && napi_complete_done(napi, work_done)) { @@ -3746,7 +3750,10 @@ int ionic_lif_init(struct ionic_lif *lif) goto err_out_notifyq_deinit; } - err = ionic_init_nic_features(lif); + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + err = ionic_set_nic_features(lif, lif->netdev->features); + else + err = ionic_init_nic_features(lif); if (err) goto err_out_notifyq_deinit; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 42006de8069d..b4f8692a3ead 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -327,7 +327,7 @@ static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs) static inline bool ionic_txq_hwstamp_enabled(struct ionic_queue *q) { - return unlikely(q->features & IONIC_TXQ_F_HWSTAMP); + return q->features & IONIC_TXQ_F_HWSTAMP; } void ionic_link_status_check_request(struct ionic_lif *lif, bool can_sleep); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 165ab08ad2dd..29b4d039bbce 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -15,7 +15,7 @@ #include "ionic_debugfs.h" MODULE_DESCRIPTION(IONIC_DRV_DESCRIPTION); -MODULE_AUTHOR("Pensando Systems, Inc"); +MODULE_AUTHOR("Shannon Nelson <shannon.nelson@amd.com>"); MODULE_LICENSE("GPL"); static const char *ionic_error_to_str(enum ionic_status_code code) @@ -416,6 +416,9 @@ static void ionic_dev_cmd_clean(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; + if (!idev->dev_cmd_regs) + return; + iowrite32(0, &idev->dev_cmd_regs->doorbell); memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd)); } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 56a7ad5bff17..6d168ad8c84f 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -5,13 +5,12 @@ #include <linux/ipv6.h> #include <linux/if_vlan.h> #include <net/ip6_checksum.h> +#include <net/netdev_queues.h> #include "ionic.h" #include "ionic_lif.h" #include "ionic_txrx.h" -static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs); - static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, void *data, size_t len); @@ -41,8 +40,8 @@ static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell, bool ionic_txq_poke_doorbell(struct ionic_queue *q) { - unsigned long now, then, dif; struct netdev_queue *netdev_txq; + unsigned long now, then, dif; struct net_device *netdev; netdev = q->lif->netdev; @@ -100,9 +99,10 @@ bool ionic_rxq_poke_doorbell(struct ionic_queue *q) return true; } -static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q) +static inline struct netdev_queue *q_to_ndq(struct net_device *netdev, + struct ionic_queue *q) { - return netdev_get_tx_queue(q->lif->netdev, q->index); + return netdev_get_tx_queue(netdev, q->index); } static void *ionic_rx_buf_va(struct ionic_buf_info *buf_info) @@ -123,7 +123,6 @@ static unsigned int ionic_rx_buf_size(struct ionic_buf_info *buf_info) static int ionic_rx_page_alloc(struct ionic_queue *q, struct ionic_buf_info *buf_info) { - struct net_device *netdev = q->lif->netdev; struct ionic_rx_stats *stats; struct device *dev; struct page *page; @@ -133,14 +132,14 @@ static int ionic_rx_page_alloc(struct ionic_queue *q, if (unlikely(!buf_info)) { net_err_ratelimited("%s: %s invalid buf_info in alloc\n", - netdev->name, q->name); + dev_name(dev), q->name); return -EINVAL; } page = alloc_pages(IONIC_PAGE_GFP_MASK, 0); if (unlikely(!page)) { net_err_ratelimited("%s: %s page alloc failed\n", - netdev->name, q->name); + dev_name(dev), q->name); stats->alloc_err++; return -ENOMEM; } @@ -150,7 +149,7 @@ static int ionic_rx_page_alloc(struct ionic_queue *q, if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) { __free_pages(page, 0); net_err_ratelimited("%s: %s dma map failed\n", - netdev->name, q->name); + dev_name(dev), q->name); stats->dma_map_err++; return -EIO; } @@ -164,12 +163,11 @@ static int ionic_rx_page_alloc(struct ionic_queue *q, static void ionic_rx_page_free(struct ionic_queue *q, struct ionic_buf_info *buf_info) { - struct net_device *netdev = q->lif->netdev; struct device *dev = q->dev; if (unlikely(!buf_info)) { net_err_ratelimited("%s: %s invalid buf_info in free\n", - netdev->name, q->name); + dev_name(dev), q->name); return; } @@ -204,14 +202,14 @@ static bool ionic_rx_buf_recycle(struct ionic_queue *q, return true; } -static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, +static struct sk_buff *ionic_rx_frags(struct net_device *netdev, + struct ionic_queue *q, struct ionic_desc_info *desc_info, unsigned int headroom, unsigned int len, unsigned int num_sg_elems, bool synced) { - struct net_device *netdev = q->lif->netdev; struct ionic_buf_info *buf_info; struct ionic_rx_stats *stats; struct device *dev = q->dev; @@ -228,7 +226,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, skb = napi_get_frags(&q_to_qcq(q)->napi); if (unlikely(!skb)) { net_warn_ratelimited("%s: SKB alloc failed on %s!\n", - netdev->name, q->name); + dev_name(dev), q->name); stats->alloc_err++; return NULL; } @@ -272,13 +270,13 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, return skb; } -static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q, +static struct sk_buff *ionic_rx_copybreak(struct net_device *netdev, + struct ionic_queue *q, struct ionic_desc_info *desc_info, unsigned int headroom, unsigned int len, bool synced) { - struct net_device *netdev = q->lif->netdev; struct ionic_buf_info *buf_info; struct ionic_rx_stats *stats; struct device *dev = q->dev; @@ -291,7 +289,7 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q, skb = napi_alloc_skb(&q_to_qcq(q)->napi, len); if (unlikely(!skb)) { net_warn_ratelimited("%s: SKB alloc failed on %s!\n", - netdev->name, q->name); + dev_name(dev), q->name); stats->alloc_err++; return NULL; } @@ -309,7 +307,7 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q, headroom, len, DMA_FROM_DEVICE); skb_put(skb, len); - skb->protocol = eth_type_trans(skb, q->lif->netdev); + skb->protocol = eth_type_trans(skb, netdev); return skb; } @@ -349,8 +347,7 @@ static void ionic_xdp_tx_desc_clean(struct ionic_queue *q, desc_info->act = 0; } -static int ionic_xdp_post_frame(struct net_device *netdev, - struct ionic_queue *q, struct xdp_frame *frame, +static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame, enum xdp_action act, struct page *page, int off, bool ring_doorbell) { @@ -458,14 +455,16 @@ int ionic_xdp_xmit(struct net_device *netdev, int n, txq_trans_cond_update(nq); if (netif_tx_queue_stopped(nq) || - unlikely(ionic_maybe_stop_tx(txq, 1))) { + !netif_txq_maybe_stop(q_to_ndq(netdev, txq), + ionic_q_space_avail(txq), + 1, 1)) { __netif_tx_unlock(nq); return -EIO; } space = min_t(int, n, ionic_q_space_avail(txq)); for (nxmit = 0; nxmit < space ; nxmit++) { - if (ionic_xdp_post_frame(netdev, txq, xdp_frames[nxmit], + if (ionic_xdp_post_frame(txq, xdp_frames[nxmit], XDP_REDIRECT, virt_to_page(xdp_frames[nxmit]->data), 0, false)) { @@ -478,7 +477,9 @@ int ionic_xdp_xmit(struct net_device *netdev, int n, ionic_dbell_ring(lif->kern_dbpage, txq->hw_type, txq->dbval | txq->head_idx); - ionic_maybe_stop_tx(txq, 4); + netif_txq_maybe_stop(q_to_ndq(netdev, txq), + ionic_q_space_avail(txq), + 4, 4); __netif_tx_unlock(nq); return nxmit; @@ -571,7 +572,9 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, txq_trans_cond_update(nq); if (netif_tx_queue_stopped(nq) || - unlikely(ionic_maybe_stop_tx(txq, 1))) { + !netif_txq_maybe_stop(q_to_ndq(netdev, txq), + ionic_q_space_avail(txq), + 1, 1)) { __netif_tx_unlock(nq); goto out_xdp_abort; } @@ -579,7 +582,7 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, dma_unmap_page(rxq->dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE); - err = ionic_xdp_post_frame(netdev, txq, xdpf, XDP_TX, + err = ionic_xdp_post_frame(txq, xdpf, XDP_TX, buf_info->page, buf_info->page_offset, true); @@ -657,9 +660,10 @@ static void ionic_rx_clean(struct ionic_queue *q, headroom = q->xdp_rxq_info ? XDP_PACKET_HEADROOM : 0; if (len <= q->lif->rx_copybreak) - skb = ionic_rx_copybreak(q, desc_info, headroom, len, !!xdp_prog); + skb = ionic_rx_copybreak(netdev, q, desc_info, + headroom, len, !!xdp_prog); else - skb = ionic_rx_frags(q, desc_info, headroom, len, + skb = ionic_rx_frags(netdev, q, desc_info, headroom, len, comp->num_sg_elems, !!xdp_prog); if (unlikely(!skb)) { @@ -946,8 +950,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget) lif = cq->bound_q->lif; idev = &lif->ionic->idev; - work_done = ionic_cq_service(cq, budget, - ionic_tx_service, NULL, NULL); + work_done = ionic_tx_cq_service(cq, budget); if (unlikely(!budget)) return budget; @@ -1038,8 +1041,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) txqcq = lif->txqcqs[qi]; txcq = &lif->txqcqs[qi]->cq; - tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, - ionic_tx_service, NULL, NULL); + tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT); if (unlikely(!budget)) return budget; @@ -1082,7 +1084,7 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma_addr)) { net_warn_ratelimited("%s: DMA single map failed on %s!\n", - q->lif->netdev->name, q->name); + dev_name(dev), q->name); stats->dma_map_err++; return 0; } @@ -1100,7 +1102,7 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma_addr)) { net_warn_ratelimited("%s: DMA frag map failed on %s!\n", - q->lif->netdev->name, q->name); + dev_name(dev), q->name); stats->dma_map_err++; } return dma_addr; @@ -1183,7 +1185,6 @@ static void ionic_tx_clean(struct ionic_queue *q, struct ionic_tx_stats *stats = q_to_tx_stats(q); struct ionic_qcq *qcq = q_to_qcq(q); struct sk_buff *skb = cb_arg; - u16 qi; if (desc_info->xdpf) { ionic_xdp_tx_desc_clean(q->partner, desc_info); @@ -1200,9 +1201,7 @@ static void ionic_tx_clean(struct ionic_queue *q, if (!skb) return; - qi = skb_get_queue_mapping(skb); - - if (ionic_txq_hwstamp_enabled(q)) { + if (unlikely(ionic_txq_hwstamp_enabled(q))) { if (cq_info) { struct skb_shared_hwtstamps hwts = {}; __le64 *cq_desc_hwstamp; @@ -1227,24 +1226,22 @@ static void ionic_tx_clean(struct ionic_queue *q, stats->hwstamp_invalid++; } } - - } else if (unlikely(__netif_subqueue_stopped(q->lif->netdev, qi))) { - netif_wake_subqueue(q->lif->netdev, qi); } desc_info->bytes = skb->len; stats->clean++; - dev_consume_skb_any(skb); + napi_consume_skb(skb, 1); } -bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) +static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info, + unsigned int *total_pkts, unsigned int *total_bytes) { struct ionic_queue *q = cq->bound_q; struct ionic_desc_info *desc_info; struct ionic_txq_comp *comp; - int bytes = 0; - int pkts = 0; + unsigned int bytes = 0; + unsigned int pkts = 0; u16 index; comp = cq_info->cq_desc + cq->desc_size - sizeof(*comp); @@ -1269,19 +1266,52 @@ bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) desc_info->cb_arg = NULL; } while (index != le16_to_cpu(comp->comp_index)); - if (pkts && bytes && !ionic_txq_hwstamp_enabled(q)) - netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes); + (*total_pkts) += pkts; + (*total_bytes) += bytes; return true; } +unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do) +{ + struct ionic_cq_info *cq_info; + unsigned int work_done = 0; + unsigned int bytes = 0; + unsigned int pkts = 0; + + if (work_to_do == 0) + return 0; + + cq_info = &cq->info[cq->tail_idx]; + while (ionic_tx_service(cq, cq_info, &pkts, &bytes)) { + if (cq->tail_idx == cq->num_descs - 1) + cq->done_color = !cq->done_color; + cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1); + cq_info = &cq->info[cq->tail_idx]; + + if (++work_done >= work_to_do) + break; + } + + if (work_done) { + struct ionic_queue *q = cq->bound_q; + + if (likely(!ionic_txq_hwstamp_enabled(q))) + netif_txq_completed_wake(q_to_ndq(q->lif->netdev, q), + pkts, bytes, + ionic_q_space_avail(q), + IONIC_TSO_DESCS_NEEDED); + } + + return work_done; +} + void ionic_tx_flush(struct ionic_cq *cq) { struct ionic_dev *idev = &cq->lif->ionic->idev; u32 work_done; - work_done = ionic_cq_service(cq, cq->num_descs, - ionic_tx_service, NULL, NULL); + work_done = ionic_tx_cq_service(cq, cq->num_descs); if (work_done) ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index, work_done, IONIC_INTR_CRED_RESET_COALESCE); @@ -1307,8 +1337,12 @@ void ionic_tx_empty(struct ionic_queue *q) desc_info->cb_arg = NULL; } - if (pkts && bytes && !ionic_txq_hwstamp_enabled(q)) - netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes); + if (likely(!ionic_txq_hwstamp_enabled(q))) { + struct netdev_queue *ndq = q_to_ndq(q->lif->netdev, q); + + netdev_tx_completed_queue(ndq, pkts, bytes); + netdev_tx_reset_queue(ndq); + } } static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb) @@ -1356,7 +1390,7 @@ static int ionic_tx_tcp_pseudo_csum(struct sk_buff *skb) return 0; } -static void ionic_tx_tso_post(struct ionic_queue *q, +static void ionic_tx_tso_post(struct net_device *netdev, struct ionic_queue *q, struct ionic_desc_info *desc_info, struct sk_buff *skb, dma_addr_t addr, u8 nsge, u16 len, @@ -1385,15 +1419,16 @@ static void ionic_tx_tso_post(struct ionic_queue *q, if (start) { skb_tx_timestamp(skb); - if (!ionic_txq_hwstamp_enabled(q)) - netdev_tx_sent_queue(q_to_ndq(q), skb->len); + if (likely(!ionic_txq_hwstamp_enabled(q))) + netdev_tx_sent_queue(q_to_ndq(netdev, q), skb->len); ionic_txq_post(q, false, ionic_tx_clean, skb); } else { ionic_txq_post(q, done, NULL, NULL); } } -static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) +static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q, + struct sk_buff *skb) { struct ionic_tx_stats *stats = q_to_tx_stats(q); struct ionic_desc_info *desc_info; @@ -1501,7 +1536,7 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) seg_rem = min(tso_rem, mss); done = (tso_rem == 0); /* post descriptor */ - ionic_tx_tso_post(q, desc_info, skb, + ionic_tx_tso_post(netdev, q, desc_info, skb, desc_addr, desc_nsge, desc_len, hdrlen, mss, outer_csum, vlan_tci, has_vlan, start, done); @@ -1611,10 +1646,12 @@ static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb, stats->frags += skb_shinfo(skb)->nr_frags; } -static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) +static int ionic_tx(struct net_device *netdev, struct ionic_queue *q, + struct sk_buff *skb) { struct ionic_desc_info *desc_info = &q->info[q->head_idx]; struct ionic_tx_stats *stats = q_to_tx_stats(q); + bool ring_dbell = true; if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) return -EIO; @@ -1632,16 +1669,22 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) stats->pkts++; stats->bytes += skb->len; - if (!ionic_txq_hwstamp_enabled(q)) - netdev_tx_sent_queue(q_to_ndq(q), skb->len); - ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb); + if (likely(!ionic_txq_hwstamp_enabled(q))) { + struct netdev_queue *ndq = q_to_ndq(netdev, q); + + if (unlikely(!ionic_q_has_space(q, MAX_SKB_FRAGS + 1))) + netif_tx_stop_queue(ndq); + ring_dbell = __netdev_tx_sent_queue(ndq, skb->len, + netdev_xmit_more()); + } + ionic_txq_post(q, ring_dbell, ionic_tx_clean, skb); return 0; } static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) { - struct ionic_tx_stats *stats = q_to_tx_stats(q); + int nr_frags = skb_shinfo(skb)->nr_frags; bool too_many_frags = false; skb_frag_t *frag; int desc_bufs; @@ -1657,17 +1700,20 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) /* Each desc is mss long max, so a descriptor for each gso_seg */ if (skb_is_gso(skb)) { ndescs = skb_shinfo(skb)->gso_segs; + if (!nr_frags) + return ndescs; } else { ndescs = 1; - if (skb_shinfo(skb)->nr_frags > q->max_sg_elems) { + if (!nr_frags) + return ndescs; + + if (unlikely(nr_frags > q->max_sg_elems)) { too_many_frags = true; goto linearize; } - } - /* If non-TSO, or no frags to check, we're done */ - if (!skb_is_gso(skb) || !skb_shinfo(skb)->nr_frags) return ndescs; + } /* We need to scan the skb to be sure that none of the MTU sized * packets in the TSO will require more sgs per descriptor than we @@ -1715,6 +1761,8 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) linearize: if (too_many_frags) { + struct ionic_tx_stats *stats = q_to_tx_stats(q); + err = skb_linearize(skb); if (err) return err; @@ -1724,30 +1772,11 @@ linearize: return ndescs; } -static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs) -{ - int stopped = 0; - - if (unlikely(!ionic_q_has_space(q, ndescs))) { - netif_stop_subqueue(q->lif->netdev, q->index); - stopped = 1; - - /* Might race with ionic_tx_clean, check again */ - smp_rmb(); - if (ionic_q_has_space(q, ndescs)) { - netif_wake_subqueue(q->lif->netdev, q->index); - stopped = 0; - } - } - - return stopped; -} - static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ionic_lif *lif = netdev_priv(netdev); - struct ionic_queue *q = &lif->hwstamp_txq->q; + struct ionic_queue *q; int err, ndescs; /* Does not stop/start txq, because we post to a separate tx queue @@ -1755,6 +1784,7 @@ static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb, * the timestamping queue, it is dropped. */ + q = &lif->hwstamp_txq->q; ndescs = ionic_tx_descs_needed(q, skb); if (unlikely(ndescs < 0)) goto err_out_drop; @@ -1764,9 +1794,9 @@ static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb, skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP; if (skb_is_gso(skb)) - err = ionic_tx_tso(q, skb); + err = ionic_tx_tso(netdev, q, skb); else - err = ionic_tx(q, skb); + err = ionic_tx(netdev, q, skb); if (err) goto err_out_drop; @@ -1804,23 +1834,19 @@ netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev) if (ndescs < 0) goto err_out_drop; - if (unlikely(ionic_maybe_stop_tx(q, ndescs))) + if (!netif_txq_maybe_stop(q_to_ndq(netdev, q), + ionic_q_space_avail(q), + ndescs, ndescs)) return NETDEV_TX_BUSY; if (skb_is_gso(skb)) - err = ionic_tx_tso(q, skb); + err = ionic_tx_tso(netdev, q, skb); else - err = ionic_tx(q, skb); + err = ionic_tx(netdev, q, skb); if (err) goto err_out_drop; - /* Stop the queue if there aren't descriptors for the next packet. - * Since our SG lists per descriptor take care of most of the possible - * fragmentation, we don't need to have many descriptors available. - */ - ionic_maybe_stop_tx(q, 4); - return NETDEV_TX_OK; err_out_drop: diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h index 82fc38e0f573..68228bb8c119 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h @@ -15,7 +15,6 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget); netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev); bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info); -bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info); int ionic_xdp_xmit(struct net_device *netdev, int n, struct xdp_frame **xdp, u32 flags); #endif /* _IONIC_TXRX_H_ */ diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index cb1746bc0e0c..847fa62c80df 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -215,7 +215,7 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb, bd2_bits1 |= (1 << ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT); - bd2_bits2 |= ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) & + bd2_bits2 |= ((skb_transport_offset(skb) >> 1) & ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK) << ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 41894d154013..b9dc0071c5de 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -446,8 +446,7 @@ static int qlcnic_tx_encap_pkt(struct qlcnic_adapter *adapter, encap_descr |= skb_network_offset(skb) << 10; first_desc->encap_descr = cpu_to_le16(encap_descr); - first_desc->tcp_hdr_offset = skb_inner_transport_header(skb) - - skb->data; + first_desc->tcp_hdr_offset = skb_inner_transport_offset(skb); first_desc->ip_hdr_offset = skb_inner_network_offset(skb); qlcnic_set_tx_flags_opcode(first_desc, flags, opcode); diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 35e642fc4b2a..b48935ec7e28 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1015,11 +1015,6 @@ enum CSR2_BIT { #define NUM_RX_QUEUE 2 #define NUM_TX_QUEUE 2 -#define RX_BUF_SZ (2048 - ETH_FCS_LEN + sizeof(__sum16)) - -#define GBETH_RX_BUFF_MAX 8192 -#define GBETH_RX_DESC_DATA_SIZE 4080 - struct ravb_tstamp_skb { struct list_head list; struct sk_buff *skb; @@ -1044,9 +1039,6 @@ struct ravb_ptp { }; struct ravb_hw_info { - void (*rx_ring_free)(struct net_device *ndev, int q); - void (*rx_ring_format)(struct net_device *ndev, int q); - void *(*alloc_rx_desc)(struct net_device *ndev, int q); bool (*receive)(struct net_device *ndev, int *quota, int q); void (*set_rate)(struct net_device *ndev); int (*set_feature)(struct net_device *ndev, netdev_features_t features); @@ -1057,9 +1049,10 @@ struct ravb_hw_info { netdev_features_t net_hw_features; netdev_features_t net_features; int stats_len; - size_t max_rx_len; u32 tccr_mask; - u32 rx_max_buf_size; + u32 rx_max_frame_size; + u32 rx_max_desc_use; + u32 rx_desc_size; unsigned aligned_tx: 1; /* hardware features */ @@ -1092,8 +1085,11 @@ struct ravb_private { struct ravb_desc *desc_bat; dma_addr_t rx_desc_dma[NUM_RX_QUEUE]; dma_addr_t tx_desc_dma[NUM_TX_QUEUE]; - struct ravb_rx_desc *gbeth_rx_ring; - struct ravb_ex_rx_desc *rx_ring[NUM_RX_QUEUE]; + union { + struct ravb_rx_desc *desc; + struct ravb_ex_rx_desc *ex_desc; + void *raw; + } rx_ring[NUM_RX_QUEUE]; struct ravb_tx_desc *tx_ring[NUM_TX_QUEUE]; void *tx_align[NUM_TX_QUEUE]; struct sk_buff *rx_1st_skb; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index f9fb772b05c7..fa48ff4aba2d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -113,12 +113,23 @@ static void ravb_set_rate_rcar(struct net_device *ndev) } } -static void ravb_set_buffer_align(struct sk_buff *skb) +static struct sk_buff * +ravb_alloc_skb(struct net_device *ndev, const struct ravb_hw_info *info, + gfp_t gfp_mask) { - u32 reserve = (unsigned long)skb->data & (RAVB_ALIGN - 1); + struct sk_buff *skb; + u32 reserve; + + skb = __netdev_alloc_skb(ndev, info->rx_max_frame_size + RAVB_ALIGN - 1, + gfp_mask); + if (!skb) + return NULL; + reserve = (unsigned long)skb->data & (RAVB_ALIGN - 1); if (reserve) skb_reserve(skb, RAVB_ALIGN - reserve); + + return skb; } /* Get MAC address from the MAC address registers @@ -191,6 +202,13 @@ static const struct mdiobb_ops bb_ops = { .get_mdio_data = ravb_get_mdio_data, }; +static struct ravb_rx_desc * +ravb_rx_get_desc(struct ravb_private *priv, unsigned int q, + unsigned int i) +{ + return priv->rx_ring[q].raw + priv->info->rx_desc_size * i; +} + /* Free TX skb function for AVB-IP */ static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only) { @@ -235,67 +253,40 @@ static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only) return free_num; } -static void ravb_rx_ring_free_gbeth(struct net_device *ndev, int q) +static void ravb_rx_ring_free(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); unsigned int ring_size; unsigned int i; - if (!priv->gbeth_rx_ring) + if (!priv->rx_ring[q].raw) return; for (i = 0; i < priv->num_rx_ring[q]; i++) { - struct ravb_rx_desc *desc = &priv->gbeth_rx_ring[i]; + struct ravb_rx_desc *desc = ravb_rx_get_desc(priv, q, i); if (!dma_mapping_error(ndev->dev.parent, le32_to_cpu(desc->dptr))) dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), - GBETH_RX_BUFF_MAX, + priv->info->rx_max_frame_size, DMA_FROM_DEVICE); } - ring_size = sizeof(struct ravb_rx_desc) * (priv->num_rx_ring[q] + 1); - dma_free_coherent(ndev->dev.parent, ring_size, priv->gbeth_rx_ring, + ring_size = priv->info->rx_desc_size * (priv->num_rx_ring[q] + 1); + dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q].raw, priv->rx_desc_dma[q]); - priv->gbeth_rx_ring = NULL; -} - -static void ravb_rx_ring_free_rcar(struct net_device *ndev, int q) -{ - struct ravb_private *priv = netdev_priv(ndev); - unsigned int ring_size; - unsigned int i; - - if (!priv->rx_ring[q]) - return; - - for (i = 0; i < priv->num_rx_ring[q]; i++) { - struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i]; - - if (!dma_mapping_error(ndev->dev.parent, - le32_to_cpu(desc->dptr))) - dma_unmap_single(ndev->dev.parent, - le32_to_cpu(desc->dptr), - RX_BUF_SZ, - DMA_FROM_DEVICE); - } - ring_size = sizeof(struct ravb_ex_rx_desc) * - (priv->num_rx_ring[q] + 1); - dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q], - priv->rx_desc_dma[q]); - priv->rx_ring[q] = NULL; + priv->rx_ring[q].raw = NULL; } /* Free skb's and DMA buffers for Ethernet AVB */ static void ravb_ring_free(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); - const struct ravb_hw_info *info = priv->info; unsigned int num_tx_desc = priv->num_tx_desc; unsigned int ring_size; unsigned int i; - info->rx_ring_free(ndev, q); + ravb_rx_ring_free(ndev, q); if (priv->tx_ring[q]) { ravb_tx_free(ndev, q, false); @@ -326,7 +317,7 @@ static void ravb_ring_free(struct net_device *ndev, int q) priv->tx_skb[q] = NULL; } -static void ravb_rx_ring_format_gbeth(struct net_device *ndev, int q) +static void ravb_rx_ring_format(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); struct ravb_rx_desc *rx_desc; @@ -334,15 +325,15 @@ static void ravb_rx_ring_format_gbeth(struct net_device *ndev, int q) dma_addr_t dma_addr; unsigned int i; - rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q]; - memset(priv->gbeth_rx_ring, 0, rx_ring_size); + rx_ring_size = priv->info->rx_desc_size * priv->num_rx_ring[q]; + memset(priv->rx_ring[q].raw, 0, rx_ring_size); /* Build RX ring buffer */ for (i = 0; i < priv->num_rx_ring[q]; i++) { /* RX descriptor */ - rx_desc = &priv->gbeth_rx_ring[i]; - rx_desc->ds_cc = cpu_to_le16(GBETH_RX_DESC_DATA_SIZE); + rx_desc = ravb_rx_get_desc(priv, q, i); + rx_desc->ds_cc = cpu_to_le16(priv->info->rx_max_desc_use); dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data, - GBETH_RX_BUFF_MAX, + priv->info->rx_max_frame_size, DMA_FROM_DEVICE); /* We just set the data size to 0 for a failed mapping which * should prevent DMA from happening... @@ -352,37 +343,7 @@ static void ravb_rx_ring_format_gbeth(struct net_device *ndev, int q) rx_desc->dptr = cpu_to_le32(dma_addr); rx_desc->die_dt = DT_FEMPTY; } - rx_desc = &priv->gbeth_rx_ring[i]; - rx_desc->dptr = cpu_to_le32((u32)priv->rx_desc_dma[q]); - rx_desc->die_dt = DT_LINKFIX; /* type */ -} - -static void ravb_rx_ring_format_rcar(struct net_device *ndev, int q) -{ - struct ravb_private *priv = netdev_priv(ndev); - struct ravb_ex_rx_desc *rx_desc; - unsigned int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q]; - dma_addr_t dma_addr; - unsigned int i; - - memset(priv->rx_ring[q], 0, rx_ring_size); - /* Build RX ring buffer */ - for (i = 0; i < priv->num_rx_ring[q]; i++) { - /* RX descriptor */ - rx_desc = &priv->rx_ring[q][i]; - rx_desc->ds_cc = cpu_to_le16(RX_BUF_SZ); - dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data, - RX_BUF_SZ, - DMA_FROM_DEVICE); - /* We just set the data size to 0 for a failed mapping which - * should prevent DMA from happening... - */ - if (dma_mapping_error(ndev->dev.parent, dma_addr)) - rx_desc->ds_cc = cpu_to_le16(0); - rx_desc->dptr = cpu_to_le32(dma_addr); - rx_desc->die_dt = DT_FEMPTY; - } - rx_desc = &priv->rx_ring[q][i]; + rx_desc = ravb_rx_get_desc(priv, q, i); rx_desc->dptr = cpu_to_le32((u32)priv->rx_desc_dma[q]); rx_desc->die_dt = DT_LINKFIX; /* type */ } @@ -391,7 +352,6 @@ static void ravb_rx_ring_format_rcar(struct net_device *ndev, int q) static void ravb_ring_format(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); - const struct ravb_hw_info *info = priv->info; unsigned int num_tx_desc = priv->num_tx_desc; struct ravb_tx_desc *tx_desc; struct ravb_desc *desc; @@ -404,7 +364,7 @@ static void ravb_ring_format(struct net_device *ndev, int q) priv->dirty_rx[q] = 0; priv->dirty_tx[q] = 0; - info->rx_ring_format(ndev, q); + ravb_rx_ring_format(ndev, q); memset(priv->tx_ring[q], 0, tx_ring_size); /* Build TX ring buffer */ @@ -430,30 +390,18 @@ static void ravb_ring_format(struct net_device *ndev, int q) desc->dptr = cpu_to_le32((u32)priv->tx_desc_dma[q]); } -static void *ravb_alloc_rx_desc_gbeth(struct net_device *ndev, int q) +static void *ravb_alloc_rx_desc(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); unsigned int ring_size; - ring_size = sizeof(struct ravb_rx_desc) * (priv->num_rx_ring[q] + 1); - - priv->gbeth_rx_ring = dma_alloc_coherent(ndev->dev.parent, ring_size, - &priv->rx_desc_dma[q], - GFP_KERNEL); - return priv->gbeth_rx_ring; -} + ring_size = priv->info->rx_desc_size * (priv->num_rx_ring[q] + 1); -static void *ravb_alloc_rx_desc_rcar(struct net_device *ndev, int q) -{ - struct ravb_private *priv = netdev_priv(ndev); - unsigned int ring_size; + priv->rx_ring[q].raw = dma_alloc_coherent(ndev->dev.parent, ring_size, + &priv->rx_desc_dma[q], + GFP_KERNEL); - ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1); - - priv->rx_ring[q] = dma_alloc_coherent(ndev->dev.parent, ring_size, - &priv->rx_desc_dma[q], - GFP_KERNEL); - return priv->rx_ring[q]; + return priv->rx_ring[q].raw; } /* Init skb and descriptor buffer for Ethernet AVB */ @@ -475,10 +423,9 @@ static int ravb_ring_init(struct net_device *ndev, int q) goto error; for (i = 0; i < priv->num_rx_ring[q]; i++) { - skb = __netdev_alloc_skb(ndev, info->max_rx_len, GFP_KERNEL); + skb = ravb_alloc_skb(ndev, info, GFP_KERNEL); if (!skb) goto error; - ravb_set_buffer_align(skb); priv->rx_skb[q][i] = skb; } @@ -491,7 +438,7 @@ static int ravb_ring_init(struct net_device *ndev, int q) } /* Allocate all RX descriptors. */ - if (!info->alloc_rx_desc(ndev, q)) + if (!ravb_alloc_rx_desc(ndev, q)) goto error; priv->dirty_rx[q] = 0; @@ -557,7 +504,7 @@ static void ravb_emac_init_gbeth(struct net_device *ndev) } /* Receive frame limit set register */ - ravb_write(ndev, GBETH_RX_BUFF_MAX + ETH_FCS_LEN, RFLR); + ravb_write(ndev, priv->info->rx_max_frame_size + ETH_FCS_LEN, RFLR); /* EMAC Mode: PAUSE prohibition; Duplex; TX; RX; CRC Pass Through */ ravb_write(ndev, ECMR_ZPF | ((priv->duplex > 0) ? ECMR_DM : 0) | @@ -618,6 +565,7 @@ static void ravb_emac_init(struct net_device *ndev) static int ravb_dmac_init_gbeth(struct net_device *ndev) { + struct ravb_private *priv = netdev_priv(ndev); int error; error = ravb_ring_init(ndev, RAVB_BE); @@ -631,7 +579,7 @@ static int ravb_dmac_init_gbeth(struct net_device *ndev) ravb_write(ndev, 0x60000000, RCR); /* Set Max Frame Length (RTC) */ - ravb_write(ndev, 0x7ffc0000 | GBETH_RX_BUFF_MAX, RTC); + ravb_write(ndev, 0x7ffc0000 | priv->info->rx_max_frame_size, RTC); /* Set FIFO size */ ravb_write(ndev, 0x00222200, TGC); @@ -804,7 +752,8 @@ static struct sk_buff *ravb_get_skb_gbeth(struct net_device *ndev, int entry, skb = priv->rx_skb[RAVB_BE][entry]; priv->rx_skb[RAVB_BE][entry] = NULL; dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), - ALIGN(GBETH_RX_BUFF_MAX, 16), DMA_FROM_DEVICE); + ALIGN(priv->info->rx_max_frame_size, 16), + DMA_FROM_DEVICE); return skb; } @@ -830,7 +779,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q]; stats = &priv->stats[q]; - desc = &priv->gbeth_rx_ring[entry]; + desc = &priv->rx_ring[q].desc[entry]; for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) { /* Descriptor type must be checked before all other reads */ dma_rmb(); @@ -901,23 +850,22 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) } entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q]; - desc = &priv->gbeth_rx_ring[entry]; + desc = &priv->rx_ring[q].desc[entry]; } /* Refill the RX ring buffers. */ for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) { entry = priv->dirty_rx[q] % priv->num_rx_ring[q]; - desc = &priv->gbeth_rx_ring[entry]; - desc->ds_cc = cpu_to_le16(GBETH_RX_DESC_DATA_SIZE); + desc = &priv->rx_ring[q].desc[entry]; + desc->ds_cc = cpu_to_le16(priv->info->rx_max_desc_use); if (!priv->rx_skb[q][entry]) { - skb = netdev_alloc_skb(ndev, info->max_rx_len); + skb = ravb_alloc_skb(ndev, info, GFP_ATOMIC); if (!skb) break; - ravb_set_buffer_align(skb); dma_addr = dma_map_single(ndev->dev.parent, skb->data, - GBETH_RX_BUFF_MAX, + priv->info->rx_max_frame_size, DMA_FROM_DEVICE); skb_checksum_none_assert(skb); /* We just set the data size to 0 for a failed mapping @@ -957,7 +905,7 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) boguscnt = min(boguscnt, *quota); limit = boguscnt; - desc = &priv->rx_ring[q][entry]; + desc = &priv->rx_ring[q].ex_desc[entry]; while (desc->die_dt != DT_FEMPTY) { /* Descriptor type must be checked before all other reads */ dma_rmb(); @@ -991,7 +939,7 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) skb = priv->rx_skb[q][entry]; priv->rx_skb[q][entry] = NULL; dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), - RX_BUF_SZ, + priv->info->rx_max_frame_size, DMA_FROM_DEVICE); get_ts &= (q == RAVB_NC) ? RAVB_RXTSTAMP_TYPE_V2_L2_EVENT : @@ -1017,20 +965,19 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) } entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q]; - desc = &priv->rx_ring[q][entry]; + desc = &priv->rx_ring[q].ex_desc[entry]; } /* Refill the RX ring buffers. */ for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) { entry = priv->dirty_rx[q] % priv->num_rx_ring[q]; - desc = &priv->rx_ring[q][entry]; - desc->ds_cc = cpu_to_le16(RX_BUF_SZ); + desc = &priv->rx_ring[q].ex_desc[entry]; + desc->ds_cc = cpu_to_le16(priv->info->rx_max_desc_use); if (!priv->rx_skb[q][entry]) { - skb = netdev_alloc_skb(ndev, info->max_rx_len); + skb = ravb_alloc_skb(ndev, info, GFP_ATOMIC); if (!skb) break; /* Better luck next round. */ - ravb_set_buffer_align(skb); dma_addr = dma_map_single(ndev->dev.parent, skb->data, le16_to_cpu(desc->ds_cc), DMA_FROM_DEVICE); @@ -2668,9 +2615,6 @@ static int ravb_mdio_release(struct ravb_private *priv) } static const struct ravb_hw_info ravb_gen3_hw_info = { - .rx_ring_free = ravb_rx_ring_free_rcar, - .rx_ring_format = ravb_rx_ring_format_rcar, - .alloc_rx_desc = ravb_alloc_rx_desc_rcar, .receive = ravb_rx_rcar, .set_rate = ravb_set_rate_rcar, .set_feature = ravb_set_features_rcar, @@ -2681,9 +2625,10 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { .net_hw_features = NETIF_F_RXCSUM, .net_features = NETIF_F_RXCSUM, .stats_len = ARRAY_SIZE(ravb_gstrings_stats), - .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1, .tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3, - .rx_max_buf_size = SZ_2K, + .rx_max_frame_size = SZ_2K, + .rx_max_desc_use = SZ_2K - ETH_FCS_LEN + sizeof(__sum16), + .rx_desc_size = sizeof(struct ravb_ex_rx_desc), .internal_delay = 1, .tx_counters = 1, .multi_irqs = 1, @@ -2694,9 +2639,6 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { }; static const struct ravb_hw_info ravb_gen2_hw_info = { - .rx_ring_free = ravb_rx_ring_free_rcar, - .rx_ring_format = ravb_rx_ring_format_rcar, - .alloc_rx_desc = ravb_alloc_rx_desc_rcar, .receive = ravb_rx_rcar, .set_rate = ravb_set_rate_rcar, .set_feature = ravb_set_features_rcar, @@ -2707,9 +2649,10 @@ static const struct ravb_hw_info ravb_gen2_hw_info = { .net_hw_features = NETIF_F_RXCSUM, .net_features = NETIF_F_RXCSUM, .stats_len = ARRAY_SIZE(ravb_gstrings_stats), - .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1, .tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3, - .rx_max_buf_size = SZ_2K, + .rx_max_frame_size = SZ_2K, + .rx_max_desc_use = SZ_2K - ETH_FCS_LEN + sizeof(__sum16), + .rx_desc_size = sizeof(struct ravb_ex_rx_desc), .aligned_tx = 1, .gptp = 1, .nc_queues = 1, @@ -2717,9 +2660,6 @@ static const struct ravb_hw_info ravb_gen2_hw_info = { }; static const struct ravb_hw_info ravb_rzv2m_hw_info = { - .rx_ring_free = ravb_rx_ring_free_rcar, - .rx_ring_format = ravb_rx_ring_format_rcar, - .alloc_rx_desc = ravb_alloc_rx_desc_rcar, .receive = ravb_rx_rcar, .set_rate = ravb_set_rate_rcar, .set_feature = ravb_set_features_rcar, @@ -2730,9 +2670,10 @@ static const struct ravb_hw_info ravb_rzv2m_hw_info = { .net_hw_features = NETIF_F_RXCSUM, .net_features = NETIF_F_RXCSUM, .stats_len = ARRAY_SIZE(ravb_gstrings_stats), - .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1, .tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3, - .rx_max_buf_size = SZ_2K, + .rx_max_frame_size = SZ_2K, + .rx_max_desc_use = SZ_2K - ETH_FCS_LEN + sizeof(__sum16), + .rx_desc_size = sizeof(struct ravb_ex_rx_desc), .multi_irqs = 1, .err_mgmt_irqs = 1, .gptp = 1, @@ -2742,9 +2683,6 @@ static const struct ravb_hw_info ravb_rzv2m_hw_info = { }; static const struct ravb_hw_info gbeth_hw_info = { - .rx_ring_free = ravb_rx_ring_free_gbeth, - .rx_ring_format = ravb_rx_ring_format_gbeth, - .alloc_rx_desc = ravb_alloc_rx_desc_gbeth, .receive = ravb_rx_gbeth, .set_rate = ravb_set_rate_gbeth, .set_feature = ravb_set_features_gbeth, @@ -2755,9 +2693,10 @@ static const struct ravb_hw_info gbeth_hw_info = { .net_hw_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM, .net_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM, .stats_len = ARRAY_SIZE(ravb_gstrings_stats_gbeth), - .max_rx_len = ALIGN(GBETH_RX_BUFF_MAX, RAVB_ALIGN), .tccr_mask = TCCR_TSRQ0, - .rx_max_buf_size = SZ_8K, + .rx_max_frame_size = SZ_8K, + .rx_max_desc_use = 4080, + .rx_desc_size = sizeof(struct ravb_rx_desc), .aligned_tx = 1, .tx_counters = 1, .carrier_counters = 1, @@ -2966,7 +2905,8 @@ static int ravb_probe(struct platform_device *pdev) priv->avb_link_active_low = of_property_read_bool(np, "renesas,ether-link-active-low"); - ndev->max_mtu = info->rx_max_buf_size - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); + ndev->max_mtu = info->rx_max_frame_size - + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); ndev->min_mtu = ETH_MIN_MTU; /* FIXME: R-Car Gen2 has 4byte alignment restriction for tx buffer diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c index a7a9ab304e13..71f9b5ec5ae4 100644 --- a/drivers/net/ethernet/sfc/siena/tx_common.c +++ b/drivers/net/ethernet/sfc/siena/tx_common.c @@ -317,11 +317,10 @@ static int efx_tx_tso_header_length(struct sk_buff *skb) size_t header_len; if (skb->encapsulation) - header_len = skb_inner_transport_header(skb) - - skb->data + + header_len = skb_inner_transport_offset(skb) + (inner_tcp_hdr(skb)->doff << 2u); else - header_len = skb_transport_header(skb) - skb->data + + header_len = skb_transport_offset(skb) + (tcp_hdr(skb)->doff << 2u); return header_len; } diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index 9f2393d34371..2adb132b2f7e 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -336,11 +336,10 @@ int efx_tx_tso_header_length(struct sk_buff *skb) size_t header_len; if (skb->encapsulation) - header_len = skb_inner_transport_header(skb) - - skb->data + + header_len = skb_inner_transport_offset(skb) + (inner_tcp_hdr(skb)->doff << 2u); else - header_len = skb_transport_header(skb) - skb->data + + header_len = skb_transport_offset(skb) + (tcp_hdr(skb)->doff << 2u); return header_len; } diff --git a/drivers/net/ethernet/sfc/tx_tso.c b/drivers/net/ethernet/sfc/tx_tso.c index 64a6768f75ea..ddf149db8180 100644 --- a/drivers/net/ethernet/sfc/tx_tso.c +++ b/drivers/net/ethernet/sfc/tx_tso.c @@ -174,8 +174,8 @@ static int tso_start(struct tso_state *st, struct efx_nic *efx, unsigned int header_len, in_len; dma_addr_t dma_addr; - st->ip_off = skb_network_header(skb) - skb->data; - st->tcp_off = skb_transport_header(skb) - skb->data; + st->ip_off = skb_network_offset(skb); + st->tcp_off = skb_transport_offset(skb); header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); in_len = skb_headlen(skb) - header_len; st->header_len = header_len; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 01c2e8d27b40..24cd80490d19 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2679,7 +2679,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, } if (skb) { stmmac_get_tx_hwtstamp(priv, p, skb); - } else { + } else if (tx_q->xsk_pool && + xp_tx_metadata_enabled(tx_q->xsk_pool)) { struct stmmac_xsk_tx_complete tx_compl = { .priv = priv, .desc = p, @@ -4048,8 +4049,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) { set_bit(__FPE_REMOVING, &priv->fpe_task_state); - if (priv->fpe_wq) + if (priv->fpe_wq) { destroy_workqueue(priv->fpe_wq); + priv->fpe_wq = NULL; + } netdev_info(priv->dev, "FPE workqueue stop"); } diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 3525d5c0d694..351609f4f011 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1144,9 +1144,9 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) nskb->protocol = skb->protocol; offset = skb_mac_header(skb) - skb->data; skb_set_mac_header(nskb, offset); - offset = skb_network_header(skb) - skb->data; + offset = skb_network_offset(skb); skb_set_network_header(nskb, offset); - offset = skb_transport_header(skb) - skb->data; + offset = skb_transport_offset(skb); skb_set_transport_header(nskb, offset); offset = 0; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 9d2f4ac783e4..2939a21ca74f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -294,7 +294,7 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev, txqueue, netif_tx_queue_stopped(netif_txq), jiffies_to_msecs(jiffies - trans_start), - dql_avail(&netif_txq->dql), + netdev_queue_dql_avail(netif_txq), k3_cppi_desc_pool_avail(tx_chn->desc_pool)); if (netif_tx_queue_stopped(netif_txq)) { diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 12b96ca66877..5ee8e8980393 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -384,18 +384,18 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, if (gelic_descr_get_status(descr) != GELIC_DESCR_DMA_NOT_IN_USE) dev_info(ctodev(card), "%s: ERROR status\n", __func__); - descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size); - if (!descr->skb) { - descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */ - return -ENOMEM; - } descr->hw_regs.dmac_cmd_status = 0; descr->hw_regs.result_size = 0; descr->hw_regs.valid_size = 0; descr->hw_regs.data_error = 0; descr->hw_regs.payload.dev_addr = 0; descr->hw_regs.payload.size = 0; - descr->skb = NULL; + + descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size); + if (!descr->skb) { + descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */ + return -ENOMEM; + } offset = ((unsigned long)descr->skb->data) & (GELIC_NET_RXBUF_ALIGN - 1); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 7cf02ab6de68..6dff2c85682d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1257,7 +1257,7 @@ static int wx_tso(struct wx_ring *tx_ring, struct wx_tx_buffer *first, /* compute header lengths */ l4len = enc ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); - *hdr_len = enc ? (skb_inner_transport_header(skb) - skb->data) : + *hdr_len = enc ? skb_inner_transport_offset(skb) : skb_transport_offset(skb); *hdr_len += l4len; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index e67a21294158..bd4624d14ca0 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -81,6 +81,7 @@ static void txgbe_up_complete(struct wx *wx) { struct net_device *netdev = wx->netdev; + txgbe_reinit_gpio_intr(wx); wx_control_hw(wx, true); wx_configure_vectors(wx); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index bae0a8ee7014..93295916b1d2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -475,8 +475,10 @@ irqreturn_t txgbe_gpio_irq_handler(int irq, void *data) gc = txgbe->gpio; for_each_set_bit(hwirq, &gpioirq, gc->ngpio) { int gpio = irq_find_mapping(gc->irq.domain, hwirq); + struct irq_data *d = irq_get_irq_data(gpio); u32 irq_type = irq_get_trigger_type(gpio); + txgbe_gpio_irq_ack(d); handle_nested_irq(gpio); if ((irq_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) { @@ -489,6 +491,33 @@ irqreturn_t txgbe_gpio_irq_handler(int irq, void *data) return IRQ_HANDLED; } +void txgbe_reinit_gpio_intr(struct wx *wx) +{ + struct txgbe *txgbe = wx->priv; + irq_hw_number_t hwirq; + unsigned long gpioirq; + struct gpio_chip *gc; + unsigned long flags; + + /* for gpio interrupt pending before irq enable */ + gpioirq = rd32(wx, WX_GPIO_INTSTATUS); + + gc = txgbe->gpio; + for_each_set_bit(hwirq, &gpioirq, gc->ngpio) { + int gpio = irq_find_mapping(gc->irq.domain, hwirq); + struct irq_data *d = irq_get_irq_data(gpio); + u32 irq_type = irq_get_trigger_type(gpio); + + txgbe_gpio_irq_ack(d); + + if ((irq_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) { + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + txgbe_toggle_trigger(gc, hwirq); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); + } + } +} + static int txgbe_gpio_init(struct txgbe *txgbe) { struct gpio_irq_chip *girq; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h index 9855d44076cb..8a026d804fe2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h @@ -5,6 +5,7 @@ #define _TXGBE_PHY_H_ irqreturn_t txgbe_gpio_irq_handler(int irq, void *data); +void txgbe_reinit_gpio_intr(struct wx *wx); irqreturn_t txgbe_link_irq_handler(int irq, void *data); int txgbe_init_phy(struct txgbe *txgbe); void txgbe_remove_phy(struct txgbe *txgbe); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 6f3f9b446b1d..2f6739fe78af 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -221,7 +221,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; - int err = 0; + int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { @@ -272,9 +272,23 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(geneve->dev, rx_length_errors); + DEV_STATS_INC(geneve->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) @@ -319,19 +333,12 @@ static int geneve_init(struct net_device *dev) struct geneve_dev *geneve = netdev_priv(dev); int err; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - err = gro_cells_init(&geneve->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); + if (err) return err; - } err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); if (err) { - free_percpu(dev->tstats); gro_cells_destroy(&geneve->gro_cells); return err; } @@ -345,7 +352,6 @@ static void geneve_uninit(struct net_device *dev) dst_cache_destroy(&geneve->cfg.info.dst_cache); gro_cells_destroy(&geneve->gro_cells); - free_percpu(dev->tstats); } /* Callback from net/ipv4/udp.c to receive packets */ @@ -508,7 +514,7 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk, gh_len = geneve_hlen(gh); hlen = off_gnv + gh_len; - if (skb_gro_header_hard(skb, hlen)) { + if (!skb_gro_may_pull(skb, hlen)) { gh = skb_gro_header_slow(skb, hlen, off_gnv); if (unlikely(!gh)) goto out; @@ -1122,7 +1128,6 @@ static const struct net_device_ops geneve_netdev_ops = { .ndo_open = geneve_open, .ndo_stop = geneve_stop, .ndo_start_xmit = geneve_xmit, - .ndo_get_stats64 = dev_get_tstats64, .ndo_change_mtu = geneve_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, @@ -1189,6 +1194,7 @@ static void geneve_setup(struct net_device *dev) dev->hw_features |= NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; /* MTU range: 68 - (something less than 65535) */ dev->min_mtu = ETH_MIN_MTU; /* The max_mtu calculation does not take account of GENEVE diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 45e58fef98e1..ba4704c2c640 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -711,25 +711,11 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) return ret; } -static int gtp_dev_init(struct net_device *dev) -{ - struct gtp_dev *gtp = netdev_priv(dev); - - gtp->dev = dev; - - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - - return 0; -} - static void gtp_dev_uninit(struct net_device *dev) { struct gtp_dev *gtp = netdev_priv(dev); gtp_encap_disable(gtp); - free_percpu(dev->tstats); } static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx) @@ -942,10 +928,8 @@ tx_err: } static const struct net_device_ops gtp_netdev_ops = { - .ndo_init = gtp_dev_init, .ndo_uninit = gtp_dev_uninit, .ndo_start_xmit = gtp_dev_xmit, - .ndo_get_stats64 = dev_get_tstats64, }; static const struct device_type gtp_type = { @@ -957,6 +941,7 @@ static void gtp_link_setup(struct net_device *dev) unsigned int max_gtp_header_len = sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(struct gtp0_header); + struct gtp_dev *gtp = netdev_priv(dev); dev->netdev_ops = >p_netdev_ops; dev->needs_free_netdev = true; @@ -970,11 +955,13 @@ static void gtp_link_setup(struct net_device *dev) dev->type = ARPHRD_NONE; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->priv_flags |= IFF_NO_QUEUE; dev->features |= NETIF_F_LLTX; netif_keep_dst(dev); dev->needed_headroom = LL_MAX_HEADER + max_gtp_header_len; + gtp->dev = dev; } static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); @@ -1903,26 +1890,26 @@ static int __init gtp_init(void) get_random_bytes(>p_h_initval, sizeof(gtp_h_initval)); - err = rtnl_link_register(>p_link_ops); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto error_out; - err = register_pernet_subsys(>p_net_ops); + err = rtnl_link_register(>p_link_ops); if (err < 0) - goto unreg_rtnl_link; + goto unreg_pernet_subsys; err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_pernet_subsys; + goto unreg_rtnl_link; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_pernet_subsys: - unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); error_out: pr_err("error loading GTP module loaded\n"); return err; diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index f3355e040a9e..334cd62cf286 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -21,7 +21,6 @@ struct clk; struct icc_path; struct net_device; -struct platform_device; struct ipa_power; struct ipa_smp2p; @@ -31,7 +30,7 @@ struct ipa_interrupt; * struct ipa - IPA information * @gsi: Embedded GSI structure * @version: IPA hardware version - * @pdev: Platform device + * @dev: IPA device pointer * @completion: Used to signal pipeline clear transfer complete * @nb: Notifier block used for remoteproc SSR * @notifier: Remoteproc SSR notifier @@ -79,7 +78,7 @@ struct ipa_interrupt; struct ipa { struct gsi gsi; enum ipa_version version; - struct platform_device *pdev; + struct device *dev; struct completion completion; struct notifier_block nb; void *notifier; diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index f1419fbd776c..39219963dbb3 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -174,7 +174,7 @@ bool ipa_cmd_table_init_valid(struct ipa *ipa, const struct ipa_mem *mem, u32 offset_max = field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK); u32 size_max = field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK); const char *table = route ? "route" : "filter"; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; u32 size; size = route ? ipa->route_count : ipa->filter_count + 1; @@ -204,7 +204,7 @@ bool ipa_cmd_table_init_valid(struct ipa *ipa, const struct ipa_mem *mem, /* Validate the memory region that holds headers */ static bool ipa_cmd_header_init_local_valid(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; const struct ipa_mem *mem; u32 offset_max; u32 size_max; @@ -256,7 +256,7 @@ static bool ipa_cmd_register_write_offset_valid(struct ipa *ipa, const char *name, u32 offset) { struct ipa_cmd_register_write *payload; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; u32 offset_max; u32 bit_count; diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index afa1d56d9095..dd490941615e 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -233,8 +233,8 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count, const struct ipa_gsi_endpoint_data *data) { const struct ipa_gsi_endpoint_data *other_data; - struct device *dev = &ipa->pdev->dev; enum ipa_endpoint_name other_name; + struct device *dev = ipa->dev; if (ipa_gsi_endpoint_data_empty(data)) return true; @@ -388,7 +388,7 @@ static u32 ipa_endpoint_max(struct ipa *ipa, u32 count, const struct ipa_gsi_endpoint_data *data) { const struct ipa_gsi_endpoint_data *dp = data; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; enum ipa_endpoint_name name; u32 max; @@ -606,7 +606,7 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa) count = ipa->modem_tx_count + ipa_cmd_pipeline_clear_count(); trans = ipa_cmd_trans_alloc(ipa, count); if (!trans) { - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "no transaction to reset modem exception endpoints\n"); return -EBUSY; } @@ -1498,8 +1498,7 @@ ipa_endpoint_status_tag_valid(struct ipa_endpoint *endpoint, const void *data) if (endpoint_id == command_endpoint->endpoint_id) { complete(&ipa->completion); } else { - dev_err(&ipa->pdev->dev, - "unexpected tagged packet from endpoint %u\n", + dev_err(ipa->dev, "unexpected tagged packet from endpoint %u\n", endpoint_id); } @@ -1536,6 +1535,7 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint, void *data = page_address(page) + NET_SKB_PAD; u32 unused = buffer_size - total_len; struct ipa *ipa = endpoint->ipa; + struct device *dev = ipa->dev; u32 resid = total_len; while (resid) { @@ -1544,7 +1544,7 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint, u32 len; if (resid < IPA_STATUS_SIZE) { - dev_err(&endpoint->ipa->pdev->dev, + dev_err(dev, "short message (%u bytes < %zu byte status)\n", resid, IPA_STATUS_SIZE); break; @@ -1666,8 +1666,8 @@ void ipa_endpoint_default_route_clear(struct ipa *ipa) */ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint) { - struct device *dev = &endpoint->ipa->pdev->dev; struct ipa *ipa = endpoint->ipa; + struct device *dev = ipa->dev; struct gsi *gsi = &ipa->gsi; bool suspended = false; dma_addr_t addr; @@ -1769,7 +1769,7 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint) gsi_channel_reset(&ipa->gsi, channel_id, true); if (ret) - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "error %d resetting channel %u for endpoint %u\n", ret, endpoint->channel_id, endpoint->endpoint_id); } @@ -1817,7 +1817,7 @@ int ipa_endpoint_enable_one(struct ipa_endpoint *endpoint) ret = gsi_channel_start(gsi, endpoint->channel_id); if (ret) { - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "error %d starting %cX channel %u for endpoint %u\n", ret, endpoint->toward_ipa ? 'T' : 'R', endpoint->channel_id, endpoint_id); @@ -1854,14 +1854,13 @@ void ipa_endpoint_disable_one(struct ipa_endpoint *endpoint) /* Note that if stop fails, the channel's state is not well-defined */ ret = gsi_channel_stop(gsi, endpoint->channel_id); if (ret) - dev_err(&ipa->pdev->dev, - "error %d attempting to stop endpoint %u\n", ret, - endpoint_id); + dev_err(ipa->dev, "error %d attempting to stop endpoint %u\n", + ret, endpoint_id); } void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint) { - struct device *dev = &endpoint->ipa->pdev->dev; + struct device *dev = endpoint->ipa->dev; struct gsi *gsi = &endpoint->ipa->gsi; int ret; @@ -1881,7 +1880,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint) void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint) { - struct device *dev = &endpoint->ipa->pdev->dev; + struct device *dev = endpoint->ipa->dev; struct gsi *gsi = &endpoint->ipa->gsi; int ret; @@ -1983,7 +1982,7 @@ void ipa_endpoint_deconfig(struct ipa *ipa) int ipa_endpoint_config(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; const struct reg *reg; u32 endpoint_id; u32 hw_limit; diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index 4d80bf77a532..c3e8784d51d9 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -19,6 +19,7 @@ * time only these three are supported. */ +#include <linux/platform_device.h> #include <linux/types.h> #include <linux/interrupt.h> #include <linux/pm_runtime.h> @@ -109,14 +110,13 @@ static irqreturn_t ipa_isr_thread(int irq, void *dev_id) struct ipa_interrupt *interrupt = dev_id; struct ipa *ipa = interrupt->ipa; u32 enabled = interrupt->enabled; + struct device *dev = ipa->dev; const struct reg *reg; - struct device *dev; u32 pending; u32 offset; u32 mask; int ret; - dev = &ipa->pdev->dev; ret = pm_runtime_get_sync(dev); if (WARN_ON(ret < 0)) goto out_power_put; @@ -236,29 +236,17 @@ void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt) } /* Configure the IPA interrupt framework */ -struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa) +int ipa_interrupt_config(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; - struct ipa_interrupt *interrupt; + struct ipa_interrupt *interrupt = ipa->interrupt; + unsigned int irq = interrupt->irq; + struct device *dev = ipa->dev; const struct reg *reg; - unsigned int irq; int ret; - ret = platform_get_irq_byname(ipa->pdev, "ipa"); - if (ret <= 0) { - dev_err(dev, "DT error %d getting \"ipa\" IRQ property\n", - ret); - return ERR_PTR(ret ? : -EINVAL); - } - irq = ret; - - interrupt = kzalloc(sizeof(*interrupt), GFP_KERNEL); - if (!interrupt) - return ERR_PTR(-ENOMEM); interrupt->ipa = ipa; - interrupt->irq = irq; - /* Start with all IPA interrupts disabled */ + /* Disable all IPA interrupt types */ reg = ipa_reg(ipa, IPA_IRQ_EN); iowrite32(0, ipa->reg_virt + reg_offset(reg)); @@ -271,26 +259,59 @@ struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa) ret = dev_pm_set_wake_irq(dev, irq); if (ret) { - dev_err(dev, "error %d registering \"ipa\" IRQ as wakeirq\n", ret); + dev_err(dev, "error %d registering \"ipa\" IRQ as wakeirq\n", + ret); goto err_free_irq; } - return interrupt; + ipa->interrupt = interrupt; + + return 0; err_free_irq: free_irq(interrupt->irq, interrupt); err_kfree: kfree(interrupt); - return ERR_PTR(ret); + return ret; } /* Inverse of ipa_interrupt_config() */ -void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt) +void ipa_interrupt_deconfig(struct ipa *ipa) { - struct device *dev = &interrupt->ipa->pdev->dev; + struct ipa_interrupt *interrupt = ipa->interrupt; + struct device *dev = ipa->dev; + + ipa->interrupt = NULL; dev_pm_clear_wake_irq(dev); free_irq(interrupt->irq, interrupt); +} + +/* Initialize the IPA interrupt structure */ +struct ipa_interrupt *ipa_interrupt_init(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ipa_interrupt *interrupt; + int irq; + + irq = platform_get_irq_byname(pdev, "ipa"); + if (irq <= 0) { + dev_err(dev, "DT error %d getting \"ipa\" IRQ property\n", irq); + + return ERR_PTR(irq ? : -EINVAL); + } + + interrupt = kzalloc(sizeof(*interrupt), GFP_KERNEL); + if (!interrupt) + return ERR_PTR(-ENOMEM); + interrupt->irq = irq; + + return interrupt; +} + +/* Inverse of ipa_interrupt_init() */ +void ipa_interrupt_exit(struct ipa_interrupt *interrupt) +{ kfree(interrupt); } diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h index 53e1b71685c7..f3f4f4330a59 100644 --- a/drivers/net/ipa/ipa_interrupt.h +++ b/drivers/net/ipa/ipa_interrupt.h @@ -76,17 +76,31 @@ void ipa_interrupt_irq_enable(struct ipa *ipa); void ipa_interrupt_irq_disable(struct ipa *ipa); /** - * ipa_interrupt_config() - Configure the IPA interrupt framework + * ipa_interrupt_config() - Configure IPA interrupts * @ipa: IPA pointer * - * Return: Pointer to IPA SMP2P info, or a pointer-coded error + * Return: 0 if successful, or a negative error code */ -struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa); +int ipa_interrupt_config(struct ipa *ipa); /** * ipa_interrupt_deconfig() - Inverse of ipa_interrupt_config() + * @ipa: IPA pointer + */ +void ipa_interrupt_deconfig(struct ipa *ipa); + +/** + * ipa_interrupt_init() - Initialize the IPA interrupt structure + * @pdev: IPA platform device pointer + * + * Return: Pointer to an IPA interrupt structure, or a pointer-coded error + */ +struct ipa_interrupt *ipa_interrupt_init(struct platform_device *pdev); + +/** + * ipa_interrupt_exit() - Inverse of ipa_interrupt_init() * @interrupt: IPA interrupt structure */ -void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt); +void ipa_interrupt_exit(struct ipa_interrupt *interrupt); #endif /* _IPA_INTERRUPT_H_ */ diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 00475fd7a205..57b241417e8c 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -7,7 +7,6 @@ #include <linux/types.h> #include <linux/atomic.h> #include <linux/bitfield.h> -#include <linux/device.h> #include <linux/bug.h> #include <linux/io.h> #include <linux/firmware.h> @@ -114,7 +113,7 @@ int ipa_setup(struct ipa *ipa) { struct ipa_endpoint *exception_endpoint; struct ipa_endpoint *command_endpoint; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; int ret; ret = gsi_setup(&ipa->gsi); @@ -542,12 +541,9 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data) if (ret) goto err_hardware_deconfig; - ipa->interrupt = ipa_interrupt_config(ipa); - if (IS_ERR(ipa->interrupt)) { - ret = PTR_ERR(ipa->interrupt); - ipa->interrupt = NULL; + ret = ipa_interrupt_config(ipa); + if (ret) goto err_mem_deconfig; - } ipa_uc_config(ipa); @@ -572,8 +568,7 @@ err_endpoint_deconfig: ipa_endpoint_deconfig(ipa); err_uc_deconfig: ipa_uc_deconfig(ipa); - ipa_interrupt_deconfig(ipa->interrupt); - ipa->interrupt = NULL; + ipa_interrupt_deconfig(ipa); err_mem_deconfig: ipa_mem_deconfig(ipa); err_hardware_deconfig: @@ -591,8 +586,7 @@ static void ipa_deconfig(struct ipa *ipa) ipa_modem_deconfig(ipa); ipa_endpoint_deconfig(ipa); ipa_uc_deconfig(ipa); - ipa_interrupt_deconfig(ipa->interrupt); - ipa->interrupt = NULL; + ipa_interrupt_deconfig(ipa); ipa_mem_deconfig(ipa); ipa_hardware_deconfig(ipa); } @@ -808,6 +802,7 @@ out_self: static int ipa_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct ipa_interrupt *interrupt; enum ipa_firmware_loader loader; const struct ipa_data *data; struct ipa_power *power; @@ -839,12 +834,21 @@ static int ipa_probe(struct platform_device *pdev) if (loader == IPA_LOADER_DEFER) return -EPROBE_DEFER; - /* The clock and interconnects might not be ready when we're - * probed, so might return -EPROBE_DEFER. + /* The IPA interrupt might not be ready when we're probed, so this + * might return -EPROBE_DEFER. + */ + interrupt = ipa_interrupt_init(pdev); + if (IS_ERR(interrupt)) + return PTR_ERR(interrupt); + + /* The clock and interconnects might not be ready when we're probed, + * so this might return -EPROBE_DEFER. */ power = ipa_power_init(dev, data->power_data); - if (IS_ERR(power)) - return PTR_ERR(power); + if (IS_ERR(power)) { + ret = PTR_ERR(power); + goto err_interrupt_exit; + } /* No more EPROBE_DEFER. Allocate and initialize the IPA structure */ ipa = kzalloc(sizeof(*ipa), GFP_KERNEL); @@ -853,18 +857,19 @@ static int ipa_probe(struct platform_device *pdev) goto err_power_exit; } - ipa->pdev = pdev; + ipa->dev = dev; dev_set_drvdata(dev, ipa); + ipa->interrupt = interrupt; ipa->power = power; ipa->version = data->version; ipa->modem_route_count = data->modem_route_count; init_completion(&ipa->completion); - ret = ipa_reg_init(ipa); + ret = ipa_reg_init(ipa, pdev); if (ret) goto err_kfree_ipa; - ret = ipa_mem_init(ipa, data->mem_data); + ret = ipa_mem_init(ipa, pdev, data->mem_data); if (ret) goto err_reg_exit; @@ -882,7 +887,7 @@ static int ipa_probe(struct platform_device *pdev) if (ret) goto err_endpoint_exit; - ret = ipa_smp2p_init(ipa, loader == IPA_LOADER_MODEM); + ret = ipa_smp2p_init(ipa, pdev, loader == IPA_LOADER_MODEM); if (ret) goto err_table_exit; @@ -939,17 +944,27 @@ err_kfree_ipa: kfree(ipa); err_power_exit: ipa_power_exit(power); +err_interrupt_exit: + ipa_interrupt_exit(interrupt); return ret; } static void ipa_remove(struct platform_device *pdev) { - struct ipa *ipa = dev_get_drvdata(&pdev->dev); - struct ipa_power *power = ipa->power; - struct device *dev = &pdev->dev; + struct ipa_interrupt *interrupt; + struct ipa_power *power; + struct device *dev; + struct ipa *ipa; int ret; + ipa = dev_get_drvdata(&pdev->dev); + dev = ipa->dev; + WARN_ON(dev != &pdev->dev); + + power = ipa->power; + interrupt = ipa->interrupt; + /* Prevent the modem from triggering a call to ipa_setup(). This * also ensures a modem-initiated setup that's underway completes. */ @@ -991,6 +1006,7 @@ out_power_put: ipa_reg_exit(ipa); kfree(ipa); ipa_power_exit(power); + ipa_interrupt_exit(interrupt); dev_info(dev, "IPA driver removed"); } diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index 694960537ecd..709f061ede61 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -9,6 +9,7 @@ #include <linux/bug.h> #include <linux/dma-mapping.h> #include <linux/iommu.h> +#include <linux/platform_device.h> #include <linux/io.h> #include <linux/soc/qcom/smem.h> @@ -75,9 +76,9 @@ ipa_mem_zero_region_add(struct gsi_trans *trans, enum ipa_mem_id mem_id) int ipa_mem_setup(struct ipa *ipa) { dma_addr_t addr = ipa->zero_addr; - const struct reg *reg; const struct ipa_mem *mem; struct gsi_trans *trans; + const struct reg *reg; u32 offset; u16 size; u32 val; @@ -87,7 +88,7 @@ int ipa_mem_setup(struct ipa *ipa) */ trans = ipa_cmd_trans_alloc(ipa, 4); if (!trans) { - dev_err(&ipa->pdev->dev, "no transaction for memory setup\n"); + dev_err(ipa->dev, "no transaction for memory setup\n"); return -EBUSY; } @@ -217,8 +218,8 @@ static bool ipa_mem_id_required(struct ipa *ipa, enum ipa_mem_id mem_id) static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem) { - struct device *dev = &ipa->pdev->dev; enum ipa_mem_id mem_id = mem->id; + struct device *dev = ipa->dev; u16 size_multiple; /* Make sure the memory region is valid for this version of IPA */ @@ -254,7 +255,7 @@ static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem) static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data) { DECLARE_BITMAP(regions, IPA_MEM_COUNT) = { }; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; enum ipa_mem_id mem_id; u32 i; @@ -290,7 +291,7 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data) /* Do all memory regions fit within the IPA local memory? */ static bool ipa_mem_size_valid(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; u32 limit = ipa->mem_size; u32 i; @@ -317,7 +318,7 @@ static bool ipa_mem_size_valid(struct ipa *ipa) */ int ipa_mem_config(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; const struct ipa_mem *mem; const struct reg *reg; dma_addr_t addr; @@ -393,7 +394,7 @@ err_dma_free: /* Inverse of ipa_mem_config() */ void ipa_mem_deconfig(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; dma_free_coherent(dev, ipa->zero_size, ipa->zero_virt, ipa->zero_addr); ipa->zero_size = 0; @@ -420,8 +421,7 @@ int ipa_mem_zero_modem(struct ipa *ipa) */ trans = ipa_cmd_trans_alloc(ipa, 3); if (!trans) { - dev_err(&ipa->pdev->dev, - "no transaction to zero modem memory\n"); + dev_err(ipa->dev, "no transaction to zero modem memory\n"); return -EBUSY; } @@ -452,7 +452,7 @@ int ipa_mem_zero_modem(struct ipa *ipa) */ static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; struct iommu_domain *domain; unsigned long iova; phys_addr_t phys; @@ -485,13 +485,12 @@ static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size) static void ipa_imem_exit(struct ipa *ipa) { + struct device *dev = ipa->dev; struct iommu_domain *domain; - struct device *dev; if (!ipa->imem_size) return; - dev = &ipa->pdev->dev; domain = iommu_get_domain_for_dev(dev); if (domain) { size_t size; @@ -527,7 +526,7 @@ static void ipa_imem_exit(struct ipa *ipa) */ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; struct iommu_domain *domain; unsigned long iova; phys_addr_t phys; @@ -594,7 +593,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) static void ipa_smem_exit(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; struct iommu_domain *domain; domain = iommu_get_domain_for_dev(dev); @@ -615,9 +614,10 @@ static void ipa_smem_exit(struct ipa *ipa) } /* Perform memory region-related initialization */ -int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data) +int ipa_mem_init(struct ipa *ipa, struct platform_device *pdev, + const struct ipa_mem_data *mem_data) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = &pdev->dev; struct resource *res; int ret; @@ -634,14 +634,13 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data) if (!ipa_table_mem_valid(ipa, true)) return -EINVAL; - ret = dma_set_mask_and_coherent(&ipa->pdev->dev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (ret) { dev_err(dev, "error %d setting DMA mask\n", ret); return ret; } - res = platform_get_resource_byname(ipa->pdev, IORESOURCE_MEM, - "ipa-shared"); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ipa-shared"); if (!res) { dev_err(dev, "DT error getting \"ipa-shared\" memory property\n"); diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h index 868e9c20e8c4..28aad00a151d 100644 --- a/drivers/net/ipa/ipa_mem.h +++ b/drivers/net/ipa/ipa_mem.h @@ -6,6 +6,8 @@ #ifndef _IPA_MEM_H_ #define _IPA_MEM_H_ +struct platform_device; + struct ipa; struct ipa_mem_data; @@ -100,7 +102,8 @@ int ipa_mem_setup(struct ipa *ipa); /* No ipa_mem_teardown() needed */ int ipa_mem_zero_modem(struct ipa *ipa); -int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data); +int ipa_mem_init(struct ipa *ipa, struct platform_device *pdev, + const struct ipa_mem_data *mem_data); void ipa_mem_exit(struct ipa *ipa); #endif /* _IPA_MEM_H_ */ diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index 1d1be92fbebc..c27ca3f27f7d 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -58,7 +58,7 @@ static int ipa_open(struct net_device *netdev) struct device *dev; int ret; - dev = &ipa->pdev->dev; + dev = ipa->dev; ret = pm_runtime_get_sync(dev); if (ret < 0) goto err_power_put; @@ -94,7 +94,7 @@ static int ipa_stop(struct net_device *netdev) struct device *dev; int ret; - dev = &ipa->pdev->dev; + dev = ipa->dev; ret = pm_runtime_get_sync(dev); if (ret < 0) goto out_power_put; @@ -158,7 +158,7 @@ ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev) */ netif_stop_queue(netdev); - dev = &ipa->pdev->dev; + dev = ipa->dev; ret = pm_runtime_get(dev); if (ret < 1) { /* If a resume won't happen, just drop the packet */ @@ -322,7 +322,7 @@ int ipa_modem_start(struct ipa *ipa) goto out_set_state; } - SET_NETDEV_DEV(netdev, &ipa->pdev->dev); + SET_NETDEV_DEV(netdev, ipa->dev); priv = netdev_priv(netdev); priv->ipa = ipa; priv->tx = ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]; @@ -396,7 +396,7 @@ int ipa_modem_stop(struct ipa *ipa) /* Treat a "clean" modem stop the same as a crash */ static void ipa_modem_crashed(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; int ret; /* Prevent the modem from triggering a call to ipa_setup() */ @@ -443,7 +443,7 @@ static int ipa_modem_notify(struct notifier_block *nb, unsigned long action, { struct ipa *ipa = container_of(nb, struct ipa, nb); struct qcom_ssr_notify_data *notify_data = data; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; switch (action) { case QCOM_SSR_BEFORE_POWERUP: @@ -492,7 +492,7 @@ int ipa_modem_config(struct ipa *ipa) void ipa_modem_deconfig(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; int ret; ret = qcom_unregister_ssr_notifier(ipa->notifier, &ipa->nb); diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c index 0f635b8356bf..41ca7ef5e20f 100644 --- a/drivers/net/ipa/ipa_power.c +++ b/drivers/net/ipa/ipa_power.c @@ -238,7 +238,7 @@ int ipa_power_setup(struct ipa *ipa) ipa_interrupt_enable(ipa, IPA_IRQ_TX_SUSPEND); - ret = device_init_wakeup(&ipa->pdev->dev, true); + ret = device_init_wakeup(ipa->dev, true); if (ret) ipa_interrupt_disable(ipa, IPA_IRQ_TX_SUSPEND); @@ -247,7 +247,7 @@ int ipa_power_setup(struct ipa *ipa) void ipa_power_teardown(struct ipa *ipa) { - (void)device_init_wakeup(&ipa->pdev->dev, false); + (void)device_init_wakeup(ipa->dev, false); ipa_interrupt_disable(ipa, IPA_IRQ_TX_SUSPEND); } diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c index f70f0a1d1cda..65c40e207802 100644 --- a/drivers/net/ipa/ipa_qmi.c +++ b/drivers/net/ipa/ipa_qmi.c @@ -96,7 +96,7 @@ static void ipa_server_init_complete(struct ipa_qmi *ipa_qmi) IPA_QMI_INIT_COMPLETE_IND_SZ, ipa_init_complete_ind_ei, &ind); if (ret) - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "error %d sending init complete indication\n", ret); else ipa_qmi->indication_sent = true; @@ -148,7 +148,7 @@ static void ipa_qmi_ready(struct ipa_qmi *ipa_qmi) ipa = container_of(ipa_qmi, struct ipa, qmi); ret = ipa_modem_start(ipa); if (ret) - dev_err(&ipa->pdev->dev, "error %d starting modem\n", ret); + dev_err(ipa->dev, "error %d starting modem\n", ret); } /* All QMI clients from the modem node are gone (modem shut down or crashed). */ @@ -199,7 +199,7 @@ static void ipa_server_indication_register(struct qmi_handle *qmi, ipa_qmi->indication_requested = true; ipa_qmi_ready(ipa_qmi); /* We might be ready now */ } else { - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "error %d sending register indication response\n", ret); } } @@ -228,7 +228,7 @@ static void ipa_server_driver_init_complete(struct qmi_handle *qmi, ipa_qmi->uc_ready = true; ipa_qmi_ready(ipa_qmi); /* We might be ready now */ } else { - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "error %d sending init complete response\n", ret); } } @@ -417,7 +417,7 @@ static void ipa_client_init_driver_work(struct work_struct *work) qmi = &ipa_qmi->client_handle; ipa = container_of(ipa_qmi, struct ipa, qmi); - dev = &ipa->pdev->dev; + dev = ipa->dev; ret = qmi_txn_init(qmi, &txn, NULL, NULL); if (ret < 0) { diff --git a/drivers/net/ipa/ipa_reg.c b/drivers/net/ipa/ipa_reg.c index 6a3203ae6f1e..98625956e0bb 100644 --- a/drivers/net/ipa/ipa_reg.c +++ b/drivers/net/ipa/ipa_reg.c @@ -4,6 +4,7 @@ * Copyright (C) 2019-2023 Linaro Ltd. */ +#include <linux/platform_device.h> #include <linux/io.h> #include "ipa.h" @@ -132,9 +133,9 @@ static const struct regs *ipa_regs(enum ipa_version version) } } -int ipa_reg_init(struct ipa *ipa) +int ipa_reg_init(struct ipa *ipa, struct platform_device *pdev) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = &pdev->dev; const struct regs *regs; struct resource *res; @@ -146,8 +147,7 @@ int ipa_reg_init(struct ipa *ipa) return -EINVAL; /* Setup IPA register memory */ - res = platform_get_resource_byname(ipa->pdev, IORESOURCE_MEM, - "ipa-reg"); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ipa-reg"); if (!res) { dev_err(dev, "DT error getting \"ipa-reg\" memory property\n"); return -ENODEV; diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index 2998f115f12c..62c62495b796 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -12,6 +12,8 @@ #include "ipa_version.h" #include "reg.h" +struct platform_device; + struct ipa; /** @@ -643,7 +645,7 @@ extern const struct regs ipa_regs_v5_5; const struct reg *ipa_reg(struct ipa *ipa, enum ipa_reg_id reg_id); -int ipa_reg_init(struct ipa *ipa); +int ipa_reg_init(struct ipa *ipa, struct platform_device *pdev); void ipa_reg_exit(struct ipa *ipa); #endif /* _IPA_REG_H_ */ diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index 5620dc271fac..aeccce9fab72 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -5,7 +5,7 @@ */ #include <linux/types.h> -#include <linux/device.h> +#include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/panic_notifier.h> @@ -84,15 +84,13 @@ struct ipa_smp2p { */ static void ipa_smp2p_notify(struct ipa_smp2p *smp2p) { - struct device *dev; u32 value; u32 mask; if (smp2p->notified) return; - dev = &smp2p->ipa->pdev->dev; - smp2p->power_on = pm_runtime_get_if_active(dev, true) > 0; + smp2p->power_on = pm_runtime_get_if_active(smp2p->ipa->dev, true) > 0; /* Signal whether the IPA power is enabled */ mask = BIT(smp2p->enabled_bit); @@ -152,15 +150,16 @@ static void ipa_smp2p_panic_notifier_unregister(struct ipa_smp2p *smp2p) static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id) { struct ipa_smp2p *smp2p = dev_id; + struct ipa *ipa = smp2p->ipa; struct device *dev; int ret; /* Ignore any (spurious) interrupts received after the first */ - if (smp2p->ipa->setup_complete) + if (ipa->setup_complete) return IRQ_HANDLED; /* Power needs to be active for setup */ - dev = &smp2p->ipa->pdev->dev; + dev = ipa->dev; ret = pm_runtime_get_sync(dev); if (ret < 0) { dev_err(dev, "error %d getting power for setup\n", ret); @@ -168,7 +167,7 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id) } /* An error here won't cause driver shutdown, so warn if one occurs */ - ret = ipa_setup(smp2p->ipa); + ret = ipa_setup(ipa); WARN(ret != 0, "error %d from ipa_setup()\n", ret); out_power_put: @@ -179,14 +178,15 @@ out_power_put: } /* Initialize SMP2P interrupts */ -static int ipa_smp2p_irq_init(struct ipa_smp2p *smp2p, const char *name, - irq_handler_t handler) +static int ipa_smp2p_irq_init(struct ipa_smp2p *smp2p, + struct platform_device *pdev, + const char *name, irq_handler_t handler) { - struct device *dev = &smp2p->ipa->pdev->dev; + struct device *dev = &pdev->dev; unsigned int irq; int ret; - ret = platform_get_irq_byname(smp2p->ipa->pdev, name); + ret = platform_get_irq_byname(pdev, name); if (ret <= 0) return ret ? : -EINVAL; irq = ret; @@ -208,7 +208,7 @@ static void ipa_smp2p_irq_exit(struct ipa_smp2p *smp2p, u32 irq) /* Drop the power reference if it was taken in ipa_smp2p_notify() */ static void ipa_smp2p_power_release(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; if (!ipa->smp2p->power_on) return; @@ -219,10 +219,11 @@ static void ipa_smp2p_power_release(struct ipa *ipa) } /* Initialize the IPA SMP2P subsystem */ -int ipa_smp2p_init(struct ipa *ipa, bool modem_init) +int +ipa_smp2p_init(struct ipa *ipa, struct platform_device *pdev, bool modem_init) { struct qcom_smem_state *enabled_state; - struct device *dev = &ipa->pdev->dev; + struct device *dev = &pdev->dev; struct qcom_smem_state *valid_state; struct ipa_smp2p *smp2p; u32 enabled_bit; @@ -261,7 +262,7 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init) /* We have enough information saved to handle notifications */ ipa->smp2p = smp2p; - ret = ipa_smp2p_irq_init(smp2p, "ipa-clock-query", + ret = ipa_smp2p_irq_init(smp2p, pdev, "ipa-clock-query", ipa_smp2p_modem_clk_query_isr); if (ret < 0) goto err_null_smp2p; @@ -273,7 +274,7 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init) if (modem_init) { /* Result will be non-zero (negative for error) */ - ret = ipa_smp2p_irq_init(smp2p, "ipa-setup-ready", + ret = ipa_smp2p_irq_init(smp2p, pdev, "ipa-setup-ready", ipa_smp2p_modem_setup_ready_isr); if (ret < 0) goto err_notifier_unregister; diff --git a/drivers/net/ipa/ipa_smp2p.h b/drivers/net/ipa/ipa_smp2p.h index 9b969b03d1a4..2a3d8eefb13b 100644 --- a/drivers/net/ipa/ipa_smp2p.h +++ b/drivers/net/ipa/ipa_smp2p.h @@ -8,17 +8,20 @@ #include <linux/types.h> +struct platform_device; + struct ipa; /** * ipa_smp2p_init() - Initialize the IPA SMP2P subsystem * @ipa: IPA pointer + * @pdev: Platform device pointer * @modem_init: Whether the modem is responsible for GSI initialization * * Return: 0 if successful, or a negative error code - * */ -int ipa_smp2p_init(struct ipa *ipa, bool modem_init); +int ipa_smp2p_init(struct ipa *ipa, struct platform_device *pdev, + bool modem_init); /** * ipa_smp2p_exit() - Inverse of ipa_smp2p_init() diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c index 7b637bb8b41c..a24ac11b8893 100644 --- a/drivers/net/ipa/ipa_table.c +++ b/drivers/net/ipa/ipa_table.c @@ -163,7 +163,7 @@ ipa_table_mem(struct ipa *ipa, bool filter, bool hashed, bool ipv6) bool ipa_filtered_valid(struct ipa *ipa, u64 filtered) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; u32 count; if (!filtered) { @@ -236,8 +236,7 @@ ipa_filter_reset_table(struct ipa *ipa, bool hashed, bool ipv6, bool modem) trans = ipa_cmd_trans_alloc(ipa, hweight64(ep_mask)); if (!trans) { - dev_err(&ipa->pdev->dev, - "no transaction for %s filter reset\n", + dev_err(ipa->dev, "no transaction for %s filter reset\n", modem ? "modem" : "AP"); return -EBUSY; } @@ -298,8 +297,7 @@ static int ipa_route_reset(struct ipa *ipa, bool modem) trans = ipa_cmd_trans_alloc(ipa, hash_support ? 4 : 2); if (!trans) { - dev_err(&ipa->pdev->dev, - "no transaction for %s route reset\n", + dev_err(ipa->dev, "no transaction for %s route reset\n", modem ? "modem" : "AP"); return -EBUSY; } @@ -327,7 +325,7 @@ static int ipa_route_reset(struct ipa *ipa, bool modem) void ipa_table_reset(struct ipa *ipa, bool modem) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; const char *ee_name; int ret; @@ -356,7 +354,7 @@ int ipa_table_hash_flush(struct ipa *ipa) trans = ipa_cmd_trans_alloc(ipa, 1); if (!trans) { - dev_err(&ipa->pdev->dev, "no transaction for hash flush\n"); + dev_err(ipa->dev, "no transaction for hash flush\n"); return -EBUSY; } @@ -469,7 +467,7 @@ int ipa_table_setup(struct ipa *ipa) */ trans = ipa_cmd_trans_alloc(ipa, 8); if (!trans) { - dev_err(&ipa->pdev->dev, "no transaction for table setup\n"); + dev_err(ipa->dev, "no transaction for table setup\n"); return -EBUSY; } @@ -713,7 +711,7 @@ bool ipa_table_mem_valid(struct ipa *ipa, bool filter) */ int ipa_table_init(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; dma_addr_t addr; __le64 le_addr; __le64 *virt; @@ -763,7 +761,7 @@ int ipa_table_init(struct ipa *ipa) void ipa_table_exit(struct ipa *ipa) { u32 count = max_t(u32, 1 + ipa->filter_count, ipa->route_count); - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; size_t size; size = IPA_ZERO_RULE_SIZE + (1 + count) * sizeof(__le64); diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index 7eaa0b4ebed9..bfd5dc6dab43 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -127,7 +127,7 @@ static struct ipa_uc_mem_area *ipa_uc_shared(struct ipa *ipa) static void ipa_uc_event_handler(struct ipa *ipa) { struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa); - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; if (shared->event == IPA_UC_EVENT_ERROR) dev_err(dev, "microcontroller error event\n"); @@ -141,7 +141,7 @@ static void ipa_uc_event_handler(struct ipa *ipa) static void ipa_uc_response_hdlr(struct ipa *ipa) { struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa); - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; /* An INIT_COMPLETED response message is sent to the AP by the * microcontroller when it is operational. Other than this, the AP @@ -191,7 +191,7 @@ void ipa_uc_config(struct ipa *ipa) /* Inverse of ipa_uc_config() */ void ipa_uc_deconfig(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; ipa_interrupt_disable(ipa, IPA_IRQ_UC_1); ipa_interrupt_disable(ipa, IPA_IRQ_UC_0); @@ -208,8 +208,8 @@ void ipa_uc_deconfig(struct ipa *ipa) /* Take a proxy power reference for the microcontroller */ void ipa_uc_power(struct ipa *ipa) { + struct device *dev = ipa->dev; static bool already; - struct device *dev; int ret; if (already) @@ -217,7 +217,6 @@ void ipa_uc_power(struct ipa *ipa) already = true; /* Only do this on first boot */ /* This power reference dropped in ipa_uc_response_hdlr() above */ - dev = &ipa->pdev->dev; ret = pm_runtime_get_sync(dev); if (ret < 0) { pm_runtime_put_noidle(dev); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 4b5513c9c2be..0206b84284ab 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3519,18 +3519,13 @@ static int macsec_dev_init(struct net_device *dev) struct net_device *real_dev = macsec->real_dev; int err; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - err = gro_cells_init(&macsec->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); + if (err) return err; - } dev->features = real_dev->features & MACSEC_FEATURES; dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; macsec_set_head_tail_room(dev); @@ -3550,7 +3545,6 @@ static void macsec_dev_uninit(struct net_device *dev) struct macsec_dev *macsec = macsec_priv(dev); gro_cells_destroy(&macsec->gro_cells); - free_percpu(dev->tstats); } static netdev_features_t macsec_fix_features(struct net_device *dev, diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c index 6fe08427fdd4..f40eb50bb978 100644 --- a/drivers/net/mdio/mdio-bcm-unimac.c +++ b/drivers/net/mdio/mdio-bcm-unimac.c @@ -334,6 +334,7 @@ static SIMPLE_DEV_PM_OPS(unimac_mdio_pm_ops, NULL, unimac_mdio_resume); static const struct of_device_id unimac_mdio_ids[] = { + { .compatible = "brcm,asp-v2.2-mdio", }, { .compatible = "brcm,asp-v2.1-mdio", }, { .compatible = "brcm,asp-v2.0-mdio", }, { .compatible = "brcm,genet-mdio-v5", }, diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 0c5aff63d242..64c0cdd31bf8 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -232,9 +232,154 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count) } static BUS_ATTR_WO(del_device); +static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count) +{ + struct netdevsim *nsim_a, *nsim_b, *peer; + struct net_device *dev_a, *dev_b; + unsigned int ifidx_a, ifidx_b; + int netnsfd_a, netnsfd_b, err; + struct net *ns_a, *ns_b; + + err = sscanf(buf, "%d:%u %d:%u", &netnsfd_a, &ifidx_a, &netnsfd_b, + &ifidx_b); + if (err != 4) { + pr_err("Format for linking two devices is \"netnsfd_a:ifidx_a netnsfd_b:ifidx_b\" (int uint int uint).\n"); + return -EINVAL; + } + + ns_a = get_net_ns_by_fd(netnsfd_a); + if (IS_ERR(ns_a)) { + pr_err("Could not find netns with fd: %d\n", netnsfd_a); + return -EINVAL; + } + + ns_b = get_net_ns_by_fd(netnsfd_b); + if (IS_ERR(ns_b)) { + pr_err("Could not find netns with fd: %d\n", netnsfd_b); + put_net(ns_a); + return -EINVAL; + } + + err = -EINVAL; + rtnl_lock(); + dev_a = __dev_get_by_index(ns_a, ifidx_a); + if (!dev_a) { + pr_err("Could not find device with ifindex %u in netnsfd %d\n", + ifidx_a, netnsfd_a); + goto out_err; + } + + if (!netdev_is_nsim(dev_a)) { + pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", + ifidx_a, netnsfd_a); + goto out_err; + } + + dev_b = __dev_get_by_index(ns_b, ifidx_b); + if (!dev_b) { + pr_err("Could not find device with ifindex %u in netnsfd %d\n", + ifidx_b, netnsfd_b); + goto out_err; + } + + if (!netdev_is_nsim(dev_b)) { + pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", + ifidx_b, netnsfd_b); + goto out_err; + } + + if (dev_a == dev_b) { + pr_err("Cannot link a netdevsim to itself\n"); + goto out_err; + } + + err = -EBUSY; + nsim_a = netdev_priv(dev_a); + peer = rtnl_dereference(nsim_a->peer); + if (peer) { + pr_err("Netdevsim %d:%u is already linked\n", netnsfd_a, + ifidx_a); + goto out_err; + } + + nsim_b = netdev_priv(dev_b); + peer = rtnl_dereference(nsim_b->peer); + if (peer) { + pr_err("Netdevsim %d:%u is already linked\n", netnsfd_b, + ifidx_b); + goto out_err; + } + + err = 0; + rcu_assign_pointer(nsim_a->peer, nsim_b); + rcu_assign_pointer(nsim_b->peer, nsim_a); + +out_err: + put_net(ns_b); + put_net(ns_a); + rtnl_unlock(); + + return !err ? count : err; +} +static BUS_ATTR_WO(link_device); + +static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, size_t count) +{ + struct netdevsim *nsim, *peer; + struct net_device *dev; + unsigned int ifidx; + int netnsfd, err; + struct net *ns; + + err = sscanf(buf, "%u:%u", &netnsfd, &ifidx); + if (err != 2) { + pr_err("Format for unlinking a device is \"netnsfd:ifidx\" (int uint).\n"); + return -EINVAL; + } + + ns = get_net_ns_by_fd(netnsfd); + if (IS_ERR(ns)) { + pr_err("Could not find netns with fd: %d\n", netnsfd); + return -EINVAL; + } + + err = -EINVAL; + rtnl_lock(); + dev = __dev_get_by_index(ns, ifidx); + if (!dev) { + pr_err("Could not find device with ifindex %u in netnsfd %d\n", + ifidx, netnsfd); + goto out_put_netns; + } + + if (!netdev_is_nsim(dev)) { + pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", + ifidx, netnsfd); + goto out_put_netns; + } + + nsim = netdev_priv(dev); + peer = rtnl_dereference(nsim->peer); + if (!peer) + goto out_put_netns; + + err = 0; + RCU_INIT_POINTER(nsim->peer, NULL); + RCU_INIT_POINTER(peer->peer, NULL); + +out_put_netns: + put_net(ns); + rtnl_unlock(); + + return !err ? count : err; +} +static BUS_ATTR_WO(unlink_device); + static struct attribute *nsim_bus_attrs[] = { &bus_attr_new_device.attr, &bus_attr_del_device.attr, + &bus_attr_link_device.attr, + &bus_attr_unlink_device.attr, NULL }; ATTRIBUTE_GROUPS(nsim_bus); diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 77e8250282a5..8330bc0bcb7e 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -29,18 +29,35 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); + unsigned int len = skb->len; + struct netdevsim *peer_ns; + rcu_read_lock(); if (!nsim_ipsec_tx(ns, skb)) - goto out; + goto out_drop_free; + peer_ns = rcu_dereference(ns->peer); + if (!peer_ns) + goto out_drop_free; + + skb_tx_timestamp(skb); + if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP)) + goto out_drop_cnt; + + rcu_read_unlock(); u64_stats_update_begin(&ns->syncp); ns->tx_packets++; - ns->tx_bytes += skb->len; + ns->tx_bytes += len; u64_stats_update_end(&ns->syncp); + return NETDEV_TX_OK; -out: +out_drop_free: dev_kfree_skb(skb); - +out_drop_cnt: + rcu_read_unlock(); + u64_stats_update_begin(&ns->syncp); + ns->tx_dropped++; + u64_stats_update_end(&ns->syncp); return NETDEV_TX_OK; } @@ -70,6 +87,7 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) start = u64_stats_fetch_begin(&ns->syncp); stats->tx_bytes = ns->tx_bytes; stats->tx_packets = ns->tx_packets; + stats->tx_dropped = ns->tx_dropped; } while (u64_stats_fetch_retry(&ns->syncp, start)); } @@ -265,6 +283,21 @@ nsim_set_features(struct net_device *dev, netdev_features_t features) return 0; } +static int nsim_get_iflink(const struct net_device *dev) +{ + struct netdevsim *nsim, *peer; + int iflink; + + nsim = netdev_priv(dev); + + rcu_read_lock(); + peer = rcu_dereference(nsim->peer); + iflink = peer ? READ_ONCE(peer->netdev->ifindex) : 0; + rcu_read_unlock(); + + return iflink; +} + static const struct net_device_ops nsim_netdev_ops = { .ndo_start_xmit = nsim_start_xmit, .ndo_set_rx_mode = nsim_set_rx_mode, @@ -282,6 +315,7 @@ static const struct net_device_ops nsim_netdev_ops = { .ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en, .ndo_setup_tc = nsim_setup_tc, .ndo_set_features = nsim_set_features, + .ndo_get_iflink = nsim_get_iflink, .ndo_bpf = nsim_bpf, }; @@ -302,7 +336,6 @@ static void nsim_setup(struct net_device *dev) eth_hw_addr_random(dev); dev->tx_queue_len = 0; - dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; @@ -413,8 +446,13 @@ err_free_netdev: void nsim_destroy(struct netdevsim *ns) { struct net_device *dev = ns->netdev; + struct netdevsim *peer; rtnl_lock(); + peer = rtnl_dereference(ns->peer); + if (peer) + RCU_INIT_POINTER(peer->peer, NULL); + RCU_INIT_POINTER(ns->peer, NULL); unregister_netdevice(dev); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { nsim_macsec_teardown(ns); @@ -427,6 +465,11 @@ void nsim_destroy(struct netdevsim *ns) free_netdev(dev); } +bool netdev_is_nsim(struct net_device *dev) +{ + return dev->netdev_ops == &nsim_netdev_ops; +} + static int nsim_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 028c825b86db..553c4b9b4f63 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -98,6 +98,7 @@ struct netdevsim { u64 tx_packets; u64 tx_bytes; + u64 tx_dropped; struct u64_stats_sync syncp; struct nsim_bus_dev *nsim_bus_dev; @@ -125,11 +126,13 @@ struct netdevsim { } udp_ports; struct nsim_ethtool ethtool; + struct netdevsim __rcu *peer; }; struct netdevsim * nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port); void nsim_destroy(struct netdevsim *ns); +bool netdev_is_nsim(struct net_device *dev); void nsim_ethtool_init(struct netdevsim *ns); diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index 5e19a6839dea..e5a0987a263e 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -17,17 +17,6 @@ static netdev_tx_t nlmon_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static int nlmon_dev_init(struct net_device *dev) -{ - dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); - return dev->lstats == NULL ? -ENOMEM : 0; -} - -static void nlmon_dev_uninit(struct net_device *dev) -{ - free_percpu(dev->lstats); -} - struct nlmon { struct netlink_tap nt; }; @@ -51,15 +40,7 @@ static int nlmon_close(struct net_device *dev) static void nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - u64 packets, bytes; - - dev_lstats_read(dev, &packets, &bytes); - - stats->rx_packets = packets; - stats->tx_packets = 0; - - stats->rx_bytes = bytes; - stats->tx_bytes = 0; + dev_lstats_read(dev, &stats->rx_packets, &stats->rx_bytes); } static u32 always_on(struct net_device *dev) @@ -72,8 +53,6 @@ static const struct ethtool_ops nlmon_ethtool_ops = { }; static const struct net_device_ops nlmon_ops = { - .ndo_init = nlmon_dev_init, - .ndo_uninit = nlmon_dev_uninit, .ndo_open = nlmon_open, .ndo_stop = nlmon_close, .ndo_start_xmit = nlmon_xmit, @@ -92,6 +71,7 @@ static void nlmon_setup(struct net_device *dev) dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_LLTX; dev->flags = IFF_NOARP; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; /* That's rather a softlimit here, which, of course, * can be altered. Not a real MTU, but what is to be diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c index d93f84fbb1fd..4bd66fdde367 100644 --- a/drivers/net/pcs/pcs-rzn1-miic.c +++ b/drivers/net/pcs/pcs-rzn1-miic.c @@ -183,7 +183,7 @@ static void miic_converter_enable(struct miic *miic, int port, int enable) miic_reg_rmw(miic, MIIC_CONVRST, MIIC_CONVRST_PHYIF_RST(port), val); } -static int miic_config(struct phylink_pcs *pcs, unsigned int mode, +static int miic_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit) { @@ -234,7 +234,7 @@ static int miic_config(struct phylink_pcs *pcs, unsigned int mode, return 0; } -static void miic_link_up(struct phylink_pcs *pcs, unsigned int mode, +static void miic_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex) { struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs); @@ -333,6 +333,7 @@ struct phylink_pcs *miic_create(struct device *dev, struct device_node *np) miic_port->miic = miic; miic_port->port = port - 1; miic_port->pcs.ops = &miic_phylink_ops; + miic_port->pcs.neg_mode = true; return &miic_port->pcs; } diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 1faa22f58366..42ed013385bf 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -919,7 +919,10 @@ static int m88e1111_config_init_1000basex(struct phy_device *phydev) if (extsr < 0) return extsr; - /* If using copper mode, ensure 1000BaseX auto-negotiation is enabled */ + /* If using copper mode, ensure 1000BaseX auto-negotiation is enabled. + * FIXME: this does not actually enable 1000BaseX auto-negotiation if + * it was previously disabled in the Fiber BMCR! + */ mode = extsr & MII_M1111_HWCFG_MODE_MASK; if (mode == MII_M1111_HWCFG_MODE_COPPER_1000X_NOAN) { err = phy_modify(phydev, MII_M1111_PHY_EXT_SR, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 9b6973581989..8b8634600c51 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -114,6 +114,13 @@ #define LAN8814_INTR_CTRL_REG_POLARITY BIT(1) #define LAN8814_INTR_CTRL_REG_INTR_ENABLE BIT(0) +#define LAN8814_EEE_STATE 0x38 +#define LAN8814_EEE_STATE_MASK2P5P BIT(10) + +#define LAN8814_PD_CONTROLS 0x9d +#define LAN8814_PD_CONTROLS_PD_MEAS_TIME_MASK GENMASK(3, 0) +#define LAN8814_PD_CONTROLS_PD_MEAS_TIME_VAL 0xb + /* Represents 1ppm adjustment in 2^32 format with * each nsec contains 4 clock cycles. * The value is calculated as following: (1/1000000)/((2^-32)/4) @@ -3288,6 +3295,33 @@ static int lan8814_release_coma_mode(struct phy_device *phydev) return 0; } +static void lan8814_clear_2psp_bit(struct phy_device *phydev) +{ + u16 val; + + /* It was noticed that when traffic is passing through the PHY and the + * cable is removed then the LED was still one even though there is no + * link + */ + val = lanphy_read_page_reg(phydev, 2, LAN8814_EEE_STATE); + val &= ~LAN8814_EEE_STATE_MASK2P5P; + lanphy_write_page_reg(phydev, 2, LAN8814_EEE_STATE, val); +} + +static void lan8814_update_meas_time(struct phy_device *phydev) +{ + u16 val; + + /* By setting the measure time to a value of 0xb this will allow cables + * longer than 100m to be used. This configuration can be used + * regardless of the mode of operation of the PHY + */ + val = lanphy_read_page_reg(phydev, 1, LAN8814_PD_CONTROLS); + val &= ~LAN8814_PD_CONTROLS_PD_MEAS_TIME_MASK; + val |= LAN8814_PD_CONTROLS_PD_MEAS_TIME_VAL; + lanphy_write_page_reg(phydev, 1, LAN8814_PD_CONTROLS, val); +} + static int lan8814_probe(struct phy_device *phydev) { const struct kszphy_type *type = phydev->drv->driver_data; @@ -3324,6 +3358,10 @@ static int lan8814_probe(struct phy_device *phydev) lan8814_ptp_init(phydev); + /* Errata workarounds */ + lan8814_clear_2psp_bit(phydev); + lan8814_update_meas_time(phydev); + return 0; } diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 3e95b8a15f44..5695935fdce9 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -1550,6 +1550,8 @@ EXPORT_SYMBOL(genphy_c45_ethtool_get_eee); * advertised, but the previously advertised link modes are * retained. This allows EEE to be enabled/disabled in a * non-destructive way. + * Returns either error code, 0 if there was no change, or positive + * value if there was a change which triggered auto-neg. */ int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data) @@ -1576,8 +1578,16 @@ int genphy_c45_ethtool_set_eee(struct phy_device *phydev, phydev->eee_enabled = data->eee_enabled; ret = genphy_c45_an_config_eee_aneg(phydev); - if (ret > 0) - return phy_restart_aneg(phydev); + if (ret > 0) { + ret = phy_restart_aneg(phydev); + if (ret < 0) + return ret; + + /* explicitly return 1, otherwise (ret > 0) value will be + * overwritten by phy_restart_aneg(). + */ + return 1; + } return ret; } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 14224e06d69f..c3a0a5ee5f11 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -983,9 +983,17 @@ static int phy_check_link_status(struct phy_device *phydev) if (phydev->link && phydev->state != PHY_RUNNING) { phy_check_downshift(phydev); phydev->state = PHY_RUNNING; + err = genphy_c45_eee_is_active(phydev, + NULL, NULL, NULL); + if (err < 0) + phydev->enable_tx_lpi = false; + else + phydev->enable_tx_lpi = (err & phydev->eee_cfg.tx_lpi_enabled); + phy_link_up(phydev); } else if (!phydev->link && phydev->state != PHY_NOLINK) { phydev->state = PHY_NOLINK; + phydev->enable_tx_lpi = false; phy_link_down(phydev); } @@ -1633,8 +1641,8 @@ EXPORT_SYMBOL(phy_get_eee_err); * @phydev: target phy_device struct * @data: ethtool_keee data * - * Description: it reportes the Supported/Advertisement/LP Advertisement - * capabilities. + * Description: reports the Supported/Advertisement/LP Advertisement + * capabilities, etc. */ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data) { @@ -1645,6 +1653,7 @@ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data) mutex_lock(&phydev->lock); ret = genphy_c45_ethtool_get_eee(phydev, data); + eeecfg_to_eee(data, &phydev->eee_cfg); mutex_unlock(&phydev->lock); return ret; @@ -1652,6 +1661,36 @@ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data) EXPORT_SYMBOL(phy_ethtool_get_eee); /** + * phy_ethtool_set_eee_noneg - Adjusts MAC LPI configuration without PHY + * renegotiation + * @phydev: pointer to the target PHY device structure + * @data: pointer to the ethtool_keee structure containing the new EEE settings + * + * This function updates the Energy Efficient Ethernet (EEE) configuration + * for cases where only the MAC's Low Power Idle (LPI) configuration changes, + * without triggering PHY renegotiation. It ensures that the MAC is properly + * informed of the new LPI settings by cycling the link down and up, which + * is necessary for the MAC to adopt the new configuration. This adjustment + * is done only if there is a change in the tx_lpi_enabled or tx_lpi_timer + * configuration. + */ +static void phy_ethtool_set_eee_noneg(struct phy_device *phydev, + struct ethtool_keee *data) +{ + if (phydev->eee_cfg.tx_lpi_enabled != data->tx_lpi_enabled || + phydev->eee_cfg.tx_lpi_timer != data->tx_lpi_timer) { + eee_to_eeecfg(&phydev->eee_cfg, data); + phydev->enable_tx_lpi = eeecfg_mac_can_tx_lpi(&phydev->eee_cfg); + if (phydev->link) { + phydev->link = false; + phy_link_down(phydev); + phydev->link = true; + phy_link_up(phydev); + } + } +} + +/** * phy_ethtool_set_eee - set EEE supported and status * @phydev: target phy_device struct * @data: ethtool_keee data @@ -1667,9 +1706,14 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data) mutex_lock(&phydev->lock); ret = genphy_c45_ethtool_set_eee(phydev, data); + if (ret >= 0) { + if (ret == 0) + phy_ethtool_set_eee_noneg(phydev, data); + eee_to_eeecfg(&phydev->eee_cfg, data); + } mutex_unlock(&phydev->lock); - return ret; + return ret < 0 ? ret : 0; } EXPORT_SYMBOL(phy_ethtool_set_eee); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 2eefee970851..72452e6a478c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2911,6 +2911,34 @@ void phy_advertise_eee_all(struct phy_device *phydev) EXPORT_SYMBOL_GPL(phy_advertise_eee_all); /** + * phy_support_eee - Set initial EEE policy configuration + * @phydev: Target phy_device struct + * + * This function configures the initial policy for Energy Efficient Ethernet + * (EEE) on the specified PHY device, influencing that EEE capabilities are + * advertised before the link is established. It should be called during PHY + * registration by the MAC driver and/or the PHY driver (for SmartEEE PHYs) + * if MAC supports LPI or PHY is capable to compensate missing LPI functionality + * of the MAC. + * + * The function sets default EEE policy parameters, including preparing the PHY + * to advertise EEE capabilities based on hardware support. + * + * It also sets the expected configuration for Low Power Idle (LPI) in the MAC + * driver. If the PHY framework determines that both local and remote + * advertisements support EEE, and the negotiated link mode is compatible with + * EEE, it will set enable_tx_lpi = true. The MAC driver is expected to act on + * this setting by enabling the LPI timer if enable_tx_lpi is set. + */ +void phy_support_eee(struct phy_device *phydev) +{ + linkmode_copy(phydev->advertising_eee, phydev->supported_eee); + phydev->eee_cfg.tx_lpi_enabled = true; + phydev->eee_cfg.eee_enabled = true; +} +EXPORT_SYMBOL(phy_support_eee); + +/** * phy_support_sym_pause - Enable support of symmetrical pause * @phydev: target phy_device struct * diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c index 780c28e2e4aa..672c6929119a 100644 --- a/drivers/net/phy/qcom/qca807x.c +++ b/drivers/net/phy/qcom/qca807x.c @@ -732,24 +732,24 @@ static int qca807x_probe(struct phy_device *phydev) priv->dac_disable_bias_current_tweak = of_property_read_bool(node, "qcom,dac-disable-bias-current-tweak"); - if (IS_ENABLED(CONFIG_GPIOLIB)) { - /* Make sure we don't have mixed leds node and gpio-controller - * to prevent registering leds and having gpio-controller usage - * conflicting with them. - */ - if (of_find_property(node, "leds", NULL) && - of_find_property(node, "gpio-controller", NULL)) { - phydev_err(phydev, "Invalid property detected. LEDs and gpio-controller are mutually exclusive."); - return -EINVAL; - } +#if IS_ENABLED(CONFIG_GPIOLIB) + /* Make sure we don't have mixed leds node and gpio-controller + * to prevent registering leds and having gpio-controller usage + * conflicting with them. + */ + if (of_find_property(node, "leds", NULL) && + of_find_property(node, "gpio-controller", NULL)) { + phydev_err(phydev, "Invalid property detected. LEDs and gpio-controller are mutually exclusive."); + return -EINVAL; + } - /* Do not register a GPIO controller unless flagged for it */ - if (of_property_read_bool(node, "gpio-controller")) { - ret = qca807x_gpio(phydev); - if (ret) - return ret; - } + /* Do not register a GPIO controller unless flagged for it */ + if (of_property_read_bool(node, "gpio-controller")) { + ret = qca807x_gpio(phydev); + if (ret) + return ret; } +#endif /* Attach SFP bus on combo port*/ if (phy_read(phydev, QCA807X_CHIP_CONFIGURATION)) { diff --git a/drivers/net/phy/qcom/qca808x.c b/drivers/net/phy/qcom/qca808x.c index 2acf852fb4de..5048304ccc9e 100644 --- a/drivers/net/phy/qcom/qca808x.c +++ b/drivers/net/phy/qcom/qca808x.c @@ -156,6 +156,27 @@ static bool qca808x_has_fast_retrain_or_slave_seed(struct phy_device *phydev) return linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->supported); } +static bool qca808x_is_1g_only(struct phy_device *phydev) +{ + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_AN, QCA808X_PHY_MMD7_CHIP_TYPE); + if (ret < 0) + return true; + + return !!(QCA808X_PHY_CHIP_TYPE_1G & ret); +} + +static void qca808x_fill_possible_interfaces(struct phy_device *phydev) +{ + unsigned long *possible = phydev->possible_interfaces; + + __set_bit(PHY_INTERFACE_MODE_SGMII, possible); + + if (!qca808x_is_1g_only(phydev)) + __set_bit(PHY_INTERFACE_MODE_2500BASEX, possible); +} + static int qca808x_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -220,6 +241,8 @@ static int qca808x_config_init(struct phy_device *phydev) } } + qca808x_fill_possible_interfaces(phydev); + /* Configure adc threshold as 100mv for the link 10M */ return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_ADC_THRESHOLD, QCA808X_ADC_THRESHOLD_MASK, @@ -350,11 +373,7 @@ static int qca808x_get_features(struct phy_device *phydev) * existed in the bit0 of MMD1.21, we need to remove it manually if * it is the qca8081 1G chip according to the bit0 of MMD7.0x901d. */ - ret = phy_read_mmd(phydev, MDIO_MMD_AN, QCA808X_PHY_MMD7_CHIP_TYPE); - if (ret < 0) - return ret; - - if (QCA808X_PHY_CHIP_TYPE_1G & ret) + if (qca808x_is_1g_only(phydev)) linkmode_clear_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->supported); return 0; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index db1d11ae817b..fe380fe196e7 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -295,7 +295,9 @@ static void ppp_setup(struct net_device *dev); static const struct net_device_ops ppp_netdev_ops; -static struct class *ppp_class; +static const struct class ppp_class = { + .name = "ppp", +}; /* per net-namespace data */ static inline struct ppp_net *ppp_pernet(struct net *net) @@ -1394,11 +1396,9 @@ static int __init ppp_init(void) goto out_net; } - ppp_class = class_create("ppp"); - if (IS_ERR(ppp_class)) { - err = PTR_ERR(ppp_class); + err = class_register(&ppp_class); + if (err) goto out_chrdev; - } err = rtnl_link_register(&ppp_link_ops); if (err) { @@ -1407,12 +1407,12 @@ static int __init ppp_init(void) } /* not a big deal if we fail here :-) */ - device_create(ppp_class, NULL, MKDEV(PPP_MAJOR, 0), NULL, "ppp"); + device_create(&ppp_class, NULL, MKDEV(PPP_MAJOR, 0), NULL, "ppp"); return 0; out_class: - class_destroy(ppp_class); + class_unregister(&ppp_class); out_chrdev: unregister_chrdev(PPP_MAJOR, "ppp"); out_net: @@ -3549,8 +3549,8 @@ static void __exit ppp_cleanup(void) pr_err("PPP: removing module but units remain!\n"); rtnl_link_unregister(&ppp_link_ops); unregister_chrdev(PPP_MAJOR, "ppp"); - device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0)); - class_destroy(ppp_class); + device_destroy(&ppp_class, MKDEV(PPP_MAJOR, 0)); + class_unregister(&ppp_class); unregister_pernet_device(&ppp_net_ops); } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index bc80fc1d576e..8d258e263f54 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -652,6 +652,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->tfiles[tun->numqueues - 1]); ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; + ntfile->xdp_rxq.queue_index = index; rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], NULL); @@ -976,20 +977,15 @@ static int tun_net_init(struct net_device *dev) struct ifreq *ifr = tun->ifr; int err; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - spin_lock_init(&tun->lock); err = security_tun_dev_alloc_security(&tun->security); - if (err < 0) { - free_percpu(dev->tstats); + if (err < 0) return err; - } tun_flow_init(tun); + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; @@ -1007,7 +1003,6 @@ static int tun_net_init(struct net_device *dev) if (err < 0) { tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); - free_percpu(dev->tstats); return err; } return 0; @@ -1343,7 +1338,6 @@ static const struct net_device_ops tap_netdev_ops = { .ndo_select_queue = tun_select_queue, .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = tun_set_headroom, - .ndo_get_stats64 = dev_get_tstats64, .ndo_bpf = tun_xdp, .ndo_xdp_xmit = tun_xdp_xmit, .ndo_change_carrier = tun_net_change_carrier, @@ -2316,7 +2310,6 @@ static void tun_free_netdev(struct net_device *dev) BUG_ON(!(list_empty(&tun->disabled))); - free_percpu(dev->tstats); tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); __tun_set_ebpf(tun, &tun->steering_prog, NULL); diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index cd4083e0b3b9..e13e4920ee9b 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -339,7 +339,7 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) in6_dev = in6_dev_get(netdev); if (!in6_dev) goto out; - is_router = !!in6_dev->cnf.forwarding; + is_router = !!READ_ONCE(in6_dev->cnf.forwarding); in6_dev_put(in6_dev); /* ipv6_stub != NULL if in6_dev_get returned an inet6_dev */ diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 99ec1d4a972d..8b6d6a1b3c2e 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) err = dm_read_shared_word(dev, 1, loc, &res); if (err < 0) { netdev_err(dev->net, "MDIO read error: %d\n", err); - return err; + return 0; } netdev_dbg(dev->net, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 106282612bc2..80ee4fcdfb36 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) lan78xx_rx_urb_submit_all(dev); + local_bh_disable(); napi_schedule(&dev->napi); + local_bh_enable(); } return 0; @@ -3033,7 +3035,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) if (dev->chipid == ID_REV_CHIP_ID_7801_) buf &= ~MAC_CR_GMII_EN_; - if (dev->chipid == ID_REV_CHIP_ID_7800_) { + if (dev->chipid == ID_REV_CHIP_ID_7800_ || + dev->chipid == ID_REV_CHIP_ID_7850_) { ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig); if (!ret && sig != EEPROM_INDICATOR) { /* Implies there is no external eeprom. Set mac speed */ @@ -3132,7 +3135,8 @@ static int lan78xx_open(struct net_device *net) done: mutex_unlock(&dev->dev_mutex); - usb_autopm_put_interface(dev->intf); + if (ret < 0) + usb_autopm_put_interface(dev->intf); return ret; } diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index a530f20ee257..2fa46baa589e 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -2105,6 +2105,11 @@ static const struct usb_device_id products[] = { .driver_info = (unsigned long) &smsc95xx_info, }, { + /* SYSTEC USB-SPEmodule1 10BASE-T1L Ethernet Device */ + USB_DEVICE(0x0878, 0x1400), + .driver_info = (unsigned long)&smsc95xx_info, + }, + { /* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */ USB_DEVICE(0x184F, 0x0051), .driver_info = (unsigned long)&smsc95xx_info, diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 143bd4ab160d..57947a5590cc 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -737,7 +737,9 @@ static int sr9800_bind(struct usbnet *dev, struct usb_interface *intf) data->eeprom_len = SR9800_EEPROM_LEN; - usbnet_get_endpoints(dev, intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + goto out; /* LED Setting Rule : * AABB:CCDD diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index b21ebe24057f..e84efa661589 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1633,7 +1633,6 @@ void usbnet_disconnect (struct usb_interface *intf) usb_free_urb(dev->interrupt); kfree(dev->padding_pkt); - free_percpu(net->tstats); free_netdev(net); } EXPORT_SYMBOL_GPL(usbnet_disconnect); @@ -1645,7 +1644,6 @@ static const struct net_device_ops usbnet_netdev_ops = { .ndo_tx_timeout = usbnet_tx_timeout, .ndo_set_rx_mode = usbnet_set_rx_mode, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -1710,10 +1708,6 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->rx_speed = SPEED_UNSET; dev->tx_speed = SPEED_UNSET; - net->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!net->tstats) - goto out0; - dev->msg_enable = netif_msg_init (msg_level, NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK); init_waitqueue_head(&dev->wait); @@ -1743,6 +1737,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) net->netdev_ops = &usbnet_netdev_ops; net->watchdog_timeo = TX_TIMEOUT_JIFFIES; net->ethtool_ops = &usbnet_ethtool_ops; + net->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; // allow device-specific bind/init procedures // NOTE net->name still not usable ... @@ -1861,8 +1856,6 @@ out1: */ cancel_work_sync(&dev->kevent); del_timer_sync(&dev->delay); - free_percpu(net->tstats); -out0: free_netdev(net); out: return status; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index dd5aa8ab65a8..13d902462d8e 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1138,14 +1138,6 @@ static int veth_enable_xdp(struct net_device *dev) veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); return err; } - - if (!veth_gro_requested(dev)) { - /* user-space did not require GRO, but adding XDP - * is supposed to get GRO working - */ - dev->features |= NETIF_F_GRO; - netdev_features_change(dev); - } } } @@ -1165,18 +1157,9 @@ static void veth_disable_xdp(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); - if (!netif_running(dev) || !veth_gro_requested(dev)) { + if (!netif_running(dev) || !veth_gro_requested(dev)) veth_napi_del(dev); - /* if user-space did not require GRO, since adding XDP - * enabled it, clear it now - */ - if (!veth_gro_requested(dev) && netif_running(dev)) { - dev->features &= ~NETIF_F_GRO; - netdev_features_change(dev); - } - } - veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); } @@ -1408,7 +1391,8 @@ static int veth_alloc_queues(struct net_device *dev) struct veth_priv *priv = netdev_priv(dev); int i; - priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); + priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq), + GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!priv->rq) return -ENOMEM; @@ -1424,7 +1408,7 @@ static void veth_free_queues(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); - kfree(priv->rq); + kvfree(priv->rq); } static int veth_dev_init(struct net_device *dev) @@ -1585,6 +1569,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (!old_prog) { + if (!veth_gro_requested(dev)) { + /* user-space did not require GRO, but adding + * XDP is supposed to get GRO working + */ + dev->features |= NETIF_F_GRO; + netdev_features_change(dev); + } + peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; peer->max_mtu = max_mtu; } @@ -1600,6 +1592,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (dev->flags & IFF_UP) veth_disable_xdp(dev); + /* if user-space did not require GRO, since adding XDP + * enabled it, clear it now + */ + if (!veth_gro_requested(dev)) { + dev->features &= ~NETIF_F_GRO; + netdev_features_change(dev); + } + if (peer) { peer->hw_features |= NETIF_F_GSO_SOFTWARE; peer->max_mtu = ETH_MAX_MTU; diff --git a/drivers/net/wan/framer/framer-core.c b/drivers/net/wan/framer/framer-core.c index 33b358b99f70..f547c22e26ac 100644 --- a/drivers/net/wan/framer/framer-core.c +++ b/drivers/net/wan/framer/framer-core.c @@ -18,7 +18,12 @@ #include <linux/regulator/consumer.h> #include <linux/slab.h> -static struct class *framer_class; +static void framer_release(struct device *dev); +static const struct class framer_class = { + .name = "framer", + .dev_release = framer_release, +}; + static DEFINE_MUTEX(framer_provider_mutex); static LIST_HEAD(framer_provider_list); static DEFINE_IDA(framer_ida); @@ -627,7 +632,7 @@ struct framer *framer_create(struct device *dev, struct device_node *node, INIT_DELAYED_WORK(&framer->polling_work, framer_polling_work); BLOCKING_INIT_NOTIFIER_HEAD(&framer->notifier_list); - framer->dev.class = framer_class; + framer->dev.class = &framer_class; framer->dev.parent = dev; framer->dev.of_node = node ? node : dev->of_node; framer->id = id; @@ -741,7 +746,7 @@ struct framer *framer_provider_simple_of_xlate(struct device *dev, struct class_dev_iter iter; struct framer *framer; - class_dev_iter_init(&iter, framer_class, NULL, NULL); + class_dev_iter_init(&iter, &framer_class, NULL, NULL); while ((dev = class_dev_iter_next(&iter))) { framer = dev_to_framer(dev); if (args->np != framer->dev.of_node) @@ -870,14 +875,6 @@ static void framer_release(struct device *dev) static int __init framer_core_init(void) { - framer_class = class_create("framer"); - if (IS_ERR(framer_class)) { - pr_err("failed to create framer class (%pe)\n", framer_class); - return PTR_ERR(framer_class); - } - - framer_class->dev_release = framer_release; - - return 0; + return class_register(&framer_class); } device_initcall(framer_core_init); diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index a176653c8861..df275b4fccb6 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -263,7 +263,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) * call skb_cow_data, so that there's no chance that data is removed * from the skb, so that later we can extract the original endpoint. */ - offset = skb->data - skb_network_header(skb); + offset = -skb_network_offset(skb); skb_push(skb, offset); num_frags = skb_cow_data(skb, 0, &trailer); offset += sizeof(struct message_data); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h index 9c69d3674384..e6c0f928a6bb 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2005-2014, 2019-2021, 2023 Intel Corporation + * Copyright (C) 2005-2014, 2019-2021, 2023-2024 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -66,6 +66,16 @@ enum iwl_gen2_tx_fifo { IWL_GEN2_TRIG_TX_FIFO_VO, }; +enum iwl_bz_tx_fifo { + IWL_BZ_EDCA_TX_FIFO_BK, + IWL_BZ_EDCA_TX_FIFO_BE, + IWL_BZ_EDCA_TX_FIFO_VI, + IWL_BZ_EDCA_TX_FIFO_VO, + IWL_BZ_TRIG_TX_FIFO_BK, + IWL_BZ_TRIG_TX_FIFO_BE, + IWL_BZ_TRIG_TX_FIFO_VI, + IWL_BZ_TRIG_TX_FIFO_VO, +}; /** * enum iwl_tx_queue_cfg_actions - TXQ config options * @TX_QUEUE_CFG_ENABLE_QUEUE: enable a queue diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 6d5ed79b9fff..70e03a9a937e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1309,7 +1309,9 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, mvm->net_detect = true; } else { - struct iwl_wowlan_config_cmd wowlan_config_cmd = {}; + struct iwl_wowlan_config_cmd wowlan_config_cmd = { + .offloading_tid = 0, + }; wowlan_config_cmd.sta_id = mvm_link->ap_sta_id; @@ -1321,6 +1323,11 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, goto out_noreset; } + ret = iwl_mvm_sta_ensure_queue( + mvm, ap_sta->txq[wowlan_config_cmd.offloading_tid]); + if (ret) + goto out_noreset; + ret = iwl_mvm_get_wowlan_config(mvm, wowlan, &wowlan_config_cmd, vif, mvmvif, ap_sta); if (ret) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 123fe9bba982..228ede7b8957 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -31,6 +31,17 @@ const u8 iwl_mvm_ac_to_gen2_tx_fifo[] = { IWL_GEN2_TRIG_TX_FIFO_BK, }; +const u8 iwl_mvm_ac_to_bz_tx_fifo[] = { + IWL_BZ_EDCA_TX_FIFO_VO, + IWL_BZ_EDCA_TX_FIFO_VI, + IWL_BZ_EDCA_TX_FIFO_BE, + IWL_BZ_EDCA_TX_FIFO_BK, + IWL_BZ_TRIG_TX_FIFO_VO, + IWL_BZ_TRIG_TX_FIFO_VI, + IWL_BZ_TRIG_TX_FIFO_BE, + IWL_BZ_TRIG_TX_FIFO_BK, +}; + struct iwl_mvm_mac_iface_iterator_data { struct iwl_mvm *mvm; struct ieee80211_vif *vif; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index c85d9e460ad2..5c316909cab2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1590,12 +1590,16 @@ static inline int iwl_mvm_max_active_links(struct iwl_mvm *mvm, extern const u8 iwl_mvm_ac_to_tx_fifo[]; extern const u8 iwl_mvm_ac_to_gen2_tx_fifo[]; +extern const u8 iwl_mvm_ac_to_bz_tx_fifo[]; static inline u8 iwl_mvm_mac_ac_to_tx_fifo(struct iwl_mvm *mvm, enum ieee80211_ac_numbers ac) { - return iwl_mvm_has_new_tx_api(mvm) ? - iwl_mvm_ac_to_gen2_tx_fifo[ac] : iwl_mvm_ac_to_tx_fifo[ac]; + if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) + return iwl_mvm_ac_to_bz_tx_fifo[ac]; + if (iwl_mvm_has_new_tx_api(mvm)) + return iwl_mvm_ac_to_gen2_tx_fifo[ac]; + return iwl_mvm_ac_to_tx_fifo[ac]; } struct iwl_rate_info { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index f3efbec38253..491c449fd431 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1502,6 +1502,34 @@ out_err: return ret; } +int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm, + struct ieee80211_txq *txq) +{ + struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq); + int ret = -EINVAL; + + lockdep_assert_held(&mvm->mutex); + + if (likely(test_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state)) || + !txq->sta) { + return 0; + } + + if (!iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, txq->tid)) { + set_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + ret = 0; + } + + local_bh_disable(); + spin_lock(&mvm->add_stream_lock); + if (!list_empty(&mvmtxq->list)) + list_del_init(&mvmtxq->list); + spin_unlock(&mvm->add_stream_lock); + local_bh_enable(); + + return ret; +} + void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) { struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index 4668f413abd3..b3450569864e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2023 Intel Corporation + * Copyright (C) 2012-2014, 2018-2024 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2016 Intel Deutschland GmbH */ @@ -571,6 +571,7 @@ void iwl_mvm_modify_all_sta_disable_tx(struct iwl_mvm *mvm, bool disable); void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm, struct ieee80211_txq *txq); void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk); int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_mvm_int_sta *sta, u8 *addr, u32 cipher, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index e502f4ee9e1f..782ddc8c296b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1015,8 +1015,7 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, netdev_features_t netdev_flags = NETIF_F_CSUM_MASK | NETIF_F_SG; u8 tid; - snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) + - tcp_hdrlen(skb); + snap_ip_tcp = 8 + skb_network_header_len(skb) + tcp_hdrlen(skb); if (!mvmsta->max_amsdu_len || !ieee80211_is_data_qos(hdr->frame_control) || diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 6c2b37e56c78..fa8eba47dc4c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1331,7 +1331,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, trans->txqs.tfd.size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0); - ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb); + ip_hdrlen = skb_network_header_len(skb); snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len; amsdu_pad = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c index d3bde2d010b7..33973a60d0bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c @@ -353,7 +353,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr, start_len, 0); - ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb); + ip_hdrlen = skb_network_header_len(skb); snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len; amsdu_pad = 0; diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index b0030f3ed0f0..17431f1b1a0c 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -26,7 +26,9 @@ static DEFINE_MUTEX(wwan_register_lock); /* WWAN device create|remove lock */ static DEFINE_IDA(minors); /* minors for WWAN port chardevs */ static DEFINE_IDA(wwan_dev_ids); /* for unique WWAN device IDs */ -static struct class *wwan_class; +static const struct class wwan_class = { + .name = "wwan", +}; static int wwan_major; static struct dentry *wwan_debugfs_dir; @@ -130,7 +132,7 @@ static struct wwan_device *wwan_dev_get_by_parent(struct device *parent) { struct device *dev; - dev = class_find_device(wwan_class, NULL, parent, wwan_dev_parent_match); + dev = class_find_device(&wwan_class, NULL, parent, wwan_dev_parent_match); if (!dev) return ERR_PTR(-ENODEV); @@ -147,7 +149,7 @@ static struct wwan_device *wwan_dev_get_by_name(const char *name) { struct device *dev; - dev = class_find_device(wwan_class, NULL, name, wwan_dev_name_match); + dev = class_find_device(&wwan_class, NULL, name, wwan_dev_name_match); if (!dev) return ERR_PTR(-ENODEV); @@ -183,7 +185,7 @@ static struct wwan_device *wwan_dev_get_by_debugfs(struct dentry *dir) { struct device *dev; - dev = class_find_device(wwan_class, NULL, dir, wwan_dev_debugfs_match); + dev = class_find_device(&wwan_class, NULL, dir, wwan_dev_debugfs_match); if (!dev) return ERR_PTR(-ENODEV); @@ -239,7 +241,7 @@ static struct wwan_device *wwan_create_dev(struct device *parent) } wwandev->dev.parent = parent; - wwandev->dev.class = wwan_class; + wwandev->dev.class = &wwan_class; wwandev->dev.type = &wwan_dev_type; wwandev->id = id; dev_set_name(&wwandev->dev, "wwan%d", wwandev->id); @@ -265,7 +267,7 @@ done_unlock: static int is_wwan_child(struct device *dev, void *data) { - return dev->class == wwan_class; + return dev->class == &wwan_class; } static void wwan_remove_dev(struct wwan_device *wwandev) @@ -375,7 +377,7 @@ static struct wwan_port *wwan_port_get_by_minor(unsigned int minor) { struct device *dev; - dev = class_find_device(wwan_class, NULL, &minor, wwan_port_minor_match); + dev = class_find_device(&wwan_class, NULL, &minor, wwan_port_minor_match); if (!dev) return ERR_PTR(-ENODEV); @@ -405,7 +407,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt) return -ENOMEM; /* Collect ids of same name format ports */ - class_dev_iter_init(&iter, wwan_class, NULL, &wwan_port_dev_type); + class_dev_iter_init(&iter, &wwan_class, NULL, &wwan_port_dev_type); while ((dev = class_dev_iter_next(&iter))) { if (dev->parent != &wwandev->dev) continue; @@ -477,7 +479,7 @@ struct wwan_port *wwan_create_port(struct device *parent, mutex_init(&port->data_lock); port->dev.parent = &wwandev->dev; - port->dev.class = wwan_class; + port->dev.class = &wwan_class; port->dev.type = &wwan_port_dev_type; port->dev.devt = MKDEV(wwan_major, minor); dev_set_drvdata(&port->dev, drvdata); @@ -1212,11 +1214,9 @@ static int __init wwan_init(void) if (err) return err; - wwan_class = class_create("wwan"); - if (IS_ERR(wwan_class)) { - err = PTR_ERR(wwan_class); + err = class_register(&wwan_class); + if (err) goto unregister; - } /* chrdev used for wwan ports */ wwan_major = __register_chrdev(0, 0, WWAN_MAX_MINORS, "wwan_port", @@ -1233,7 +1233,7 @@ static int __init wwan_init(void) return 0; destroy: - class_destroy(wwan_class); + class_unregister(&wwan_class); unregister: rtnl_link_unregister(&wwan_rtnl_link_ops); return err; @@ -1244,7 +1244,7 @@ static void __exit wwan_exit(void) debugfs_remove_recursive(wwan_debugfs_dir); __unregister_chrdev(wwan_major, 0, WWAN_MAX_MINORS, "wwan_port"); rtnl_link_unregister(&wwan_rtnl_link_ops); - class_destroy(wwan_class); + class_unregister(&wwan_class); } module_init(wwan_init); diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c index ff3dd24ddb33..b02befd1b6fb 100644 --- a/drivers/net/wwan/wwan_hwsim.c +++ b/drivers/net/wwan/wwan_hwsim.c @@ -25,7 +25,9 @@ static int wwan_hwsim_devsnum = 2; module_param_named(devices, wwan_hwsim_devsnum, int, 0444); MODULE_PARM_DESC(devices, "Number of simulated devices"); -static struct class *wwan_hwsim_class; +static const struct class wwan_hwsim_class = { + .name = "wwan_hwsim", +}; static struct dentry *wwan_hwsim_debugfs_topdir; static struct dentry *wwan_hwsim_debugfs_devcreate; @@ -277,7 +279,7 @@ static struct wwan_hwsim_dev *wwan_hwsim_dev_new(void) spin_unlock(&wwan_hwsim_devs_lock); dev->dev.release = wwan_hwsim_dev_release; - dev->dev.class = wwan_hwsim_class; + dev->dev.class = &wwan_hwsim_class; dev_set_name(&dev->dev, "hwsim%u", dev->id); spin_lock_init(&dev->ports_lock); @@ -511,11 +513,9 @@ static int __init wwan_hwsim_init(void) if (!wwan_wq) return -ENOMEM; - wwan_hwsim_class = class_create("wwan_hwsim"); - if (IS_ERR(wwan_hwsim_class)) { - err = PTR_ERR(wwan_hwsim_class); + err = class_register(&wwan_hwsim_class); + if (err) goto err_wq_destroy; - } wwan_hwsim_debugfs_topdir = debugfs_create_dir("wwan_hwsim", NULL); wwan_hwsim_debugfs_devcreate = @@ -534,7 +534,7 @@ err_clean_devs: wwan_hwsim_free_devs(); flush_workqueue(wwan_wq); /* Wait deletion works completion */ debugfs_remove(wwan_hwsim_debugfs_topdir); - class_destroy(wwan_hwsim_class); + class_unregister(&wwan_hwsim_class); err_wq_destroy: destroy_workqueue(wwan_wq); @@ -547,7 +547,7 @@ static void __exit wwan_hwsim_exit(void) wwan_hwsim_free_devs(); flush_workqueue(wwan_wq); /* Wait deletion works completion */ debugfs_remove(wwan_hwsim_debugfs_topdir); - class_destroy(wwan_hwsim_class); + class_unregister(&wwan_hwsim_class); destroy_workqueue(wwan_wq); } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index a6d596e05602..3692b56cb58d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1344,7 +1344,6 @@ static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl) static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) { - struct page *page; struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; unsigned int noreclaim_flag; @@ -1355,11 +1354,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) if (queue->hdr_digest || queue->data_digest) nvme_tcp_free_crypto(queue); - if (queue->pf_cache.va) { - page = virt_to_head_page(queue->pf_cache.va); - __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); - queue->pf_cache.va = NULL; - } + page_frag_cache_drain(&queue->pf_cache); noreclaim_flag = memalloc_noreclaim_save(); /* ->sock will be released by fput() */ diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c8655fc5aa5b..2aa5762e9f50 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1591,7 +1591,6 @@ static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue) static void nvmet_tcp_release_queue_work(struct work_struct *w) { - struct page *page; struct nvmet_tcp_queue *queue = container_of(w, struct nvmet_tcp_queue, release_work); @@ -1615,8 +1614,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) if (queue->hdr_digest || queue->data_digest) nvmet_tcp_free_crypto(queue); ida_free(&nvmet_tcp_queue_ida, queue->idx); - page = virt_to_head_page(queue->pf_cache.va); - __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); + page_frag_cache_drain(&queue->pf_cache); kfree(queue); } diff --git a/drivers/of/property.c b/drivers/of/property.c index b71267c6667c..fa8cd33be131 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1304,7 +1304,7 @@ static struct device_node *parse_remote_endpoint(struct device_node *np, int index) { /* Return NULL for index > 0 to signify end of remote-endpoints. */ - if (!index || strcmp(prop_name, "remote-endpoint")) + if (index > 0 || strcmp(prop_name, "remote-endpoint")) return NULL; return of_graph_get_remote_port_parent(np); diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index c8be056c248d..cfd84a899c82 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -61,7 +61,7 @@ static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc) return (irq_hw_number_t)desc->msi_index | pci_dev_id(dev) << 11 | - (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27; + ((irq_hw_number_t)(pci_domain_nr(dev->bus) & 0xFFFFFFFF)) << 27; } static void pci_msi_domain_set_desc(msi_alloc_info_t *arg, diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c index bc0d414a6aff..308c9969642e 100644 --- a/drivers/perf/cxl_pmu.c +++ b/drivers/perf/cxl_pmu.c @@ -59,7 +59,7 @@ #define CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK GENMASK_ULL(63, 59) #define CXL_PMU_FILTER_CFG_REG(n, f) (0x400 + 4 * ((f) + (n) * 8)) -#define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(15, 0) +#define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(31, 0) #define CXL_PMU_COUNTER_REG(n) (0xc00 + 8 * (n)) @@ -314,9 +314,9 @@ static bool cxl_pmu_config1_get_edge(struct perf_event *event) } /* - * CPMU specification allows for 8 filters, each with a 16 bit value... - * So we need to find 8x16bits to store it in. - * As the value used for disable is 0xffff, a separate enable switch + * CPMU specification allows for 8 filters, each with a 32 bit value... + * So we need to find 8x32bits to store it in. + * As the value used for disable is 0xffff_ffff, a separate enable switch * is needed. */ @@ -642,7 +642,7 @@ static void cxl_pmu_event_start(struct perf_event *event, int flags) if (cxl_pmu_config1_hdm_filter_en(event)) cfg = cxl_pmu_config2_get_hdm_decoder(event); else - cfg = GENMASK(15, 0); /* No filtering if 0xFFFF_FFFF */ + cfg = GENMASK(31, 0); /* No filtering if 0xFFFF_FFFF */ writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0)); } diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 0dda70e1ef90..c78a6fd6c57f 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -150,19 +150,11 @@ u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event) struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - if (!rvpmu->ctr_get_width) - /** - * If the pmu driver doesn't support counter width, set it to default - * maximum allowed by the specification. - */ - cwidth = 63; - else { - if (hwc->idx == -1) - /* Handle init case where idx is not initialized yet */ - cwidth = rvpmu->ctr_get_width(0); - else - cwidth = rvpmu->ctr_get_width(hwc->idx); - } + if (hwc->idx == -1) + /* Handle init case where idx is not initialized yet */ + cwidth = rvpmu->ctr_get_width(0); + else + cwidth = rvpmu->ctr_get_width(hwc->idx); return GENMASK_ULL(cwidth, 0); } diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c index 79fdd667922e..fa0bccf4edf2 100644 --- a/drivers/perf/riscv_pmu_legacy.c +++ b/drivers/perf/riscv_pmu_legacy.c @@ -37,6 +37,12 @@ static int pmu_legacy_event_map(struct perf_event *event, u64 *config) return pmu_legacy_ctr_get_idx(event); } +/* cycle & instret are always 64 bit, one bit less according to SBI spec */ +static int pmu_legacy_ctr_get_width(int idx) +{ + return 63; +} + static u64 pmu_legacy_read_ctr(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -111,12 +117,14 @@ static void pmu_legacy_init(struct riscv_pmu *pmu) pmu->ctr_stop = NULL; pmu->event_map = pmu_legacy_event_map; pmu->ctr_get_idx = pmu_legacy_ctr_get_idx; - pmu->ctr_get_width = NULL; + pmu->ctr_get_width = pmu_legacy_ctr_get_width; pmu->ctr_clear_idx = NULL; pmu->ctr_read = pmu_legacy_read_ctr; pmu->event_mapped = pmu_legacy_event_mapped; pmu->event_unmapped = pmu_legacy_event_unmapped; pmu->csr_index = pmu_legacy_csr_index; + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); } diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 16acd4dcdb96..452aab49db1e 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -512,7 +512,7 @@ static void pmu_sbi_set_scounteren(void *arg) if (event->hw.idx != -1) csr_write(CSR_SCOUNTEREN, - csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event))); + csr_read(CSR_SCOUNTEREN) | BIT(pmu_sbi_csr_index(event))); } static void pmu_sbi_reset_scounteren(void *arg) @@ -521,7 +521,7 @@ static void pmu_sbi_reset_scounteren(void *arg) if (event->hw.idx != -1) csr_write(CSR_SCOUNTEREN, - csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event))); + csr_read(CSR_SCOUNTEREN) & ~BIT(pmu_sbi_csr_index(event))); } static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) @@ -731,14 +731,14 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) /* compute hardware counter index */ hidx = info->csr - CSR_CYCLE; /* check if the corresponding bit is set in sscountovf */ - if (!(overflow & (1 << hidx))) + if (!(overflow & BIT(hidx))) continue; /* * Keep a track of overflowed counters so that they can be started * with updated initial value. */ - overflowed_ctrs |= 1 << lidx; + overflowed_ctrs |= BIT(lidx); hw_evt = &event->hw; riscv_pmu_event_update(event); perf_sample_data_init(&data, 0, hw_evt->last_period); diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c index e625b32889bf..0928a526e2ab 100644 --- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c +++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c @@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev) return ret; } - priv->id = of_alias_get_id(np, "mipi_dphy"); + priv->id = of_alias_get_id(np, "mipi-dphy"); if (priv->id < 0) { dev_err(dev, "Failed to get phy node alias id: %d\n", priv->id); diff --git a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c index a623f092b11f..a43e20abb10d 100644 --- a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c +++ b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c @@ -37,56 +37,28 @@ #define EUSB2_TUNE_EUSB_EQU 0x5A #define EUSB2_TUNE_EUSB_HS_COMP_CUR 0x5B -#define QCOM_EUSB2_REPEATER_INIT_CFG(r, v) \ - { \ - .reg = r, \ - .val = v, \ - } - -enum reg_fields { - F_TUNE_EUSB_HS_COMP_CUR, - F_TUNE_EUSB_EQU, - F_TUNE_EUSB_SLEW, - F_TUNE_USB2_HS_COMP_CUR, - F_TUNE_USB2_PREEM, - F_TUNE_USB2_EQU, - F_TUNE_USB2_SLEW, - F_TUNE_SQUELCH_U, - F_TUNE_HSDISC, - F_TUNE_RES_FSDIF, - F_TUNE_IUSB2, - F_TUNE_USB2_CROSSOVER, - F_NUM_TUNE_FIELDS, - - F_FORCE_VAL_5 = F_NUM_TUNE_FIELDS, - F_FORCE_EN_5, - - F_EN_CTL1, - - F_RPTR_STATUS, - F_NUM_FIELDS, -}; - -static struct reg_field eusb2_repeater_tune_reg_fields[F_NUM_FIELDS] = { - [F_TUNE_EUSB_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_EUSB_HS_COMP_CUR, 0, 1), - [F_TUNE_EUSB_EQU] = REG_FIELD(EUSB2_TUNE_EUSB_EQU, 0, 1), - [F_TUNE_EUSB_SLEW] = REG_FIELD(EUSB2_TUNE_EUSB_SLEW, 0, 1), - [F_TUNE_USB2_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_USB2_HS_COMP_CUR, 0, 1), - [F_TUNE_USB2_PREEM] = REG_FIELD(EUSB2_TUNE_USB2_PREEM, 0, 2), - [F_TUNE_USB2_EQU] = REG_FIELD(EUSB2_TUNE_USB2_EQU, 0, 1), - [F_TUNE_USB2_SLEW] = REG_FIELD(EUSB2_TUNE_USB2_SLEW, 0, 1), - [F_TUNE_SQUELCH_U] = REG_FIELD(EUSB2_TUNE_SQUELCH_U, 0, 2), - [F_TUNE_HSDISC] = REG_FIELD(EUSB2_TUNE_HSDISC, 0, 2), - [F_TUNE_RES_FSDIF] = REG_FIELD(EUSB2_TUNE_RES_FSDIF, 0, 2), - [F_TUNE_IUSB2] = REG_FIELD(EUSB2_TUNE_IUSB2, 0, 3), - [F_TUNE_USB2_CROSSOVER] = REG_FIELD(EUSB2_TUNE_USB2_CROSSOVER, 0, 2), - - [F_FORCE_VAL_5] = REG_FIELD(EUSB2_FORCE_VAL_5, 0, 7), - [F_FORCE_EN_5] = REG_FIELD(EUSB2_FORCE_EN_5, 0, 7), - - [F_EN_CTL1] = REG_FIELD(EUSB2_EN_CTL1, 0, 7), - - [F_RPTR_STATUS] = REG_FIELD(EUSB2_RPTR_STATUS, 0, 7), +enum eusb2_reg_layout { + TUNE_EUSB_HS_COMP_CUR, + TUNE_EUSB_EQU, + TUNE_EUSB_SLEW, + TUNE_USB2_HS_COMP_CUR, + TUNE_USB2_PREEM, + TUNE_USB2_EQU, + TUNE_USB2_SLEW, + TUNE_SQUELCH_U, + TUNE_HSDISC, + TUNE_RES_FSDIF, + TUNE_IUSB2, + TUNE_USB2_CROSSOVER, + NUM_TUNE_FIELDS, + + FORCE_VAL_5 = NUM_TUNE_FIELDS, + FORCE_EN_5, + + EN_CTL1, + + RPTR_STATUS, + LAYOUT_SIZE, }; struct eusb2_repeater_cfg { @@ -98,10 +70,11 @@ struct eusb2_repeater_cfg { struct eusb2_repeater { struct device *dev; - struct regmap_field *regs[F_NUM_FIELDS]; + struct regmap *regmap; struct phy *phy; struct regulator_bulk_data *vregs; const struct eusb2_repeater_cfg *cfg; + u32 base; enum phy_mode mode; }; @@ -109,10 +82,10 @@ static const char * const pm8550b_vreg_l[] = { "vdd18", "vdd3", }; -static const u32 pm8550b_init_tbl[F_NUM_TUNE_FIELDS] = { - [F_TUNE_IUSB2] = 0x8, - [F_TUNE_SQUELCH_U] = 0x3, - [F_TUNE_USB2_PREEM] = 0x5, +static const u32 pm8550b_init_tbl[NUM_TUNE_FIELDS] = { + [TUNE_IUSB2] = 0x8, + [TUNE_SQUELCH_U] = 0x3, + [TUNE_USB2_PREEM] = 0x5, }; static const struct eusb2_repeater_cfg pm8550b_eusb2_cfg = { @@ -140,47 +113,42 @@ static int eusb2_repeater_init_vregs(struct eusb2_repeater *rptr) static int eusb2_repeater_init(struct phy *phy) { - struct reg_field *regfields = eusb2_repeater_tune_reg_fields; struct eusb2_repeater *rptr = phy_get_drvdata(phy); struct device_node *np = rptr->dev->of_node; - u32 init_tbl[F_NUM_TUNE_FIELDS] = { 0 }; - u8 override; + struct regmap *regmap = rptr->regmap; + const u32 *init_tbl = rptr->cfg->init_tbl; + u8 tune_usb2_preem = init_tbl[TUNE_USB2_PREEM]; + u8 tune_hsdisc = init_tbl[TUNE_HSDISC]; + u8 tune_iusb2 = init_tbl[TUNE_IUSB2]; + u32 base = rptr->base; u32 val; int ret; - int i; + + of_property_read_u8(np, "qcom,tune-usb2-amplitude", &tune_iusb2); + of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &tune_hsdisc); + of_property_read_u8(np, "qcom,tune-usb2-preem", &tune_usb2_preem); ret = regulator_bulk_enable(rptr->cfg->num_vregs, rptr->vregs); if (ret) return ret; - regmap_field_update_bits(rptr->regs[F_EN_CTL1], EUSB2_RPTR_EN, EUSB2_RPTR_EN); + regmap_write(regmap, base + EUSB2_EN_CTL1, EUSB2_RPTR_EN); - for (i = 0; i < F_NUM_TUNE_FIELDS; i++) { - if (init_tbl[i]) { - regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]); - } else { - /* Write 0 if there's no value set */ - u32 mask = GENMASK(regfields[i].msb, regfields[i].lsb); - - regmap_field_update_bits(rptr->regs[i], mask, 0); - } - } - memcpy(init_tbl, rptr->cfg->init_tbl, sizeof(init_tbl)); + regmap_write(regmap, base + EUSB2_TUNE_EUSB_HS_COMP_CUR, init_tbl[TUNE_EUSB_HS_COMP_CUR]); + regmap_write(regmap, base + EUSB2_TUNE_EUSB_EQU, init_tbl[TUNE_EUSB_EQU]); + regmap_write(regmap, base + EUSB2_TUNE_EUSB_SLEW, init_tbl[TUNE_EUSB_SLEW]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_HS_COMP_CUR, init_tbl[TUNE_USB2_HS_COMP_CUR]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_EQU, init_tbl[TUNE_USB2_EQU]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_SLEW, init_tbl[TUNE_USB2_SLEW]); + regmap_write(regmap, base + EUSB2_TUNE_SQUELCH_U, init_tbl[TUNE_SQUELCH_U]); + regmap_write(regmap, base + EUSB2_TUNE_RES_FSDIF, init_tbl[TUNE_RES_FSDIF]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_CROSSOVER, init_tbl[TUNE_USB2_CROSSOVER]); - if (!of_property_read_u8(np, "qcom,tune-usb2-amplitude", &override)) - init_tbl[F_TUNE_IUSB2] = override; + regmap_write(regmap, base + EUSB2_TUNE_USB2_PREEM, tune_usb2_preem); + regmap_write(regmap, base + EUSB2_TUNE_HSDISC, tune_hsdisc); + regmap_write(regmap, base + EUSB2_TUNE_IUSB2, tune_iusb2); - if (!of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &override)) - init_tbl[F_TUNE_HSDISC] = override; - - if (!of_property_read_u8(np, "qcom,tune-usb2-preem", &override)) - init_tbl[F_TUNE_USB2_PREEM] = override; - - for (i = 0; i < F_NUM_TUNE_FIELDS; i++) - regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]); - - ret = regmap_field_read_poll_timeout(rptr->regs[F_RPTR_STATUS], - val, val & RPTR_OK, 10, 5); + ret = regmap_read_poll_timeout(regmap, base + EUSB2_RPTR_STATUS, val, val & RPTR_OK, 10, 5); if (ret) dev_err(rptr->dev, "initialization timed-out\n"); @@ -191,6 +159,8 @@ static int eusb2_repeater_set_mode(struct phy *phy, enum phy_mode mode, int submode) { struct eusb2_repeater *rptr = phy_get_drvdata(phy); + struct regmap *regmap = rptr->regmap; + u32 base = rptr->base; switch (mode) { case PHY_MODE_USB_HOST: @@ -199,10 +169,8 @@ static int eusb2_repeater_set_mode(struct phy *phy, * per eUSB 1.2 Spec. Below implement software workaround until * PHY and controller is fixing seen observation. */ - regmap_field_update_bits(rptr->regs[F_FORCE_EN_5], - F_CLK_19P2M_EN, F_CLK_19P2M_EN); - regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5], - V_CLK_19P2M_EN, V_CLK_19P2M_EN); + regmap_write(regmap, base + EUSB2_FORCE_EN_5, F_CLK_19P2M_EN); + regmap_write(regmap, base + EUSB2_FORCE_VAL_5, V_CLK_19P2M_EN); break; case PHY_MODE_USB_DEVICE: /* @@ -211,10 +179,8 @@ static int eusb2_repeater_set_mode(struct phy *phy, * repeater doesn't clear previous value due to shared * regulators (say host <-> device mode switch). */ - regmap_field_update_bits(rptr->regs[F_FORCE_EN_5], - F_CLK_19P2M_EN, 0); - regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5], - V_CLK_19P2M_EN, 0); + regmap_write(regmap, base + EUSB2_FORCE_EN_5, 0); + regmap_write(regmap, base + EUSB2_FORCE_VAL_5, 0); break; default: return -EINVAL; @@ -243,9 +209,8 @@ static int eusb2_repeater_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct phy_provider *phy_provider; struct device_node *np = dev->of_node; - struct regmap *regmap; - int i, ret; u32 res; + int ret; rptr = devm_kzalloc(dev, sizeof(*rptr), GFP_KERNEL); if (!rptr) @@ -258,22 +223,15 @@ static int eusb2_repeater_probe(struct platform_device *pdev) if (!rptr->cfg) return -EINVAL; - regmap = dev_get_regmap(dev->parent, NULL); - if (!regmap) + rptr->regmap = dev_get_regmap(dev->parent, NULL); + if (!rptr->regmap) return -ENODEV; ret = of_property_read_u32(np, "reg", &res); if (ret < 0) return ret; - for (i = 0; i < F_NUM_FIELDS; i++) - eusb2_repeater_tune_reg_fields[i].reg += res; - - ret = devm_regmap_field_bulk_alloc(dev, regmap, rptr->regs, - eusb2_repeater_tune_reg_fields, - F_NUM_FIELDS); - if (ret) - return ret; + rptr->base = res; ret = eusb2_repeater_init_vregs(rptr); if (ret < 0) { diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index c2590579190a..03fb0d4b75d7 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -299,7 +299,7 @@ static int m31usb_phy_probe(struct platform_device *pdev) qphy->vreg = devm_regulator_get(dev, "vdda-phy"); if (IS_ERR(qphy->vreg)) - return dev_err_probe(dev, PTR_ERR(qphy->phy), + return dev_err_probe(dev, PTR_ERR(qphy->vreg), "failed to get vreg\n"); phy_set_drvdata(qphy->phy, qphy); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 6621246e4ddf..5c003988c35d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1556,7 +1556,7 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; -static const struct qmp_usb_offsets qmp_usb_offsets_ipq8074 = { +static const struct qmp_usb_offsets qmp_usb_offsets_v3 = { .serdes = 0, .pcs = 0x800, .pcs_misc = 0x600, @@ -1572,7 +1572,7 @@ static const struct qmp_usb_offsets qmp_usb_offsets_ipq9574 = { .rx = 0x400, }; -static const struct qmp_usb_offsets qmp_usb_offsets_v3 = { +static const struct qmp_usb_offsets qmp_usb_offsets_v3_msm8996 = { .serdes = 0, .pcs = 0x600, .tx = 0x200, @@ -1624,7 +1624,7 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v7 = { static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_ipq8074, + .offsets = &qmp_usb_offsets_v3, .serdes_tbl = ipq9574_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(ipq9574_usb3_serdes_tbl), @@ -1642,7 +1642,7 @@ static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = { static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_ipq8074, + .offsets = &qmp_usb_offsets_v3, .serdes_tbl = ipq8074_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(ipq8074_usb3_serdes_tbl), @@ -1678,7 +1678,7 @@ static const struct qmp_phy_cfg ipq9574_usb3phy_cfg = { static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_v3, + .offsets = &qmp_usb_offsets_v3_msm8996, .serdes_tbl = msm8996_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(msm8996_usb3_serdes_tbl), diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index feaa09f5b35a..4f734e049f4a 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -296,7 +296,8 @@ static int amd_pmf_suspend_handler(struct device *dev) { struct amd_pmf_dev *pdev = dev_get_drvdata(dev); - kfree(pdev->buf); + if (pdev->smart_pc_enabled) + cancel_delayed_work_sync(&pdev->pb_work); return 0; } @@ -312,6 +313,9 @@ static int amd_pmf_resume_handler(struct device *dev) return ret; } + if (pdev->smart_pc_enabled) + schedule_delayed_work(&pdev->pb_work, msecs_to_jiffies(2000)); + return 0; } @@ -330,9 +334,14 @@ static void amd_pmf_init_features(struct amd_pmf_dev *dev) dev_dbg(dev->dev, "SPS enabled and Platform Profiles registered\n"); } - if (!amd_pmf_init_smart_pc(dev)) { + amd_pmf_init_smart_pc(dev); + if (dev->smart_pc_enabled) { dev_dbg(dev->dev, "Smart PC Solution Enabled\n"); - } else if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) { + /* If Smart PC is enabled, no need to check for other features */ + return; + } + + if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) { amd_pmf_init_auto_mode(dev); dev_dbg(dev->dev, "Auto Mode Init done\n"); } else if (is_apmf_func_supported(dev, APMF_FUNC_DYN_SLIDER_AC) || @@ -351,7 +360,7 @@ static void amd_pmf_deinit_features(struct amd_pmf_dev *dev) amd_pmf_deinit_sps(dev); } - if (!dev->smart_pc_enabled) { + if (dev->smart_pc_enabled) { amd_pmf_deinit_smart_pc(dev); } else if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) { amd_pmf_deinit_auto_mode(dev); diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h index 16999c5b334f..66cae1cca73c 100644 --- a/drivers/platform/x86/amd/pmf/pmf.h +++ b/drivers/platform/x86/amd/pmf/pmf.h @@ -441,11 +441,6 @@ struct apmf_dyn_slider_output { struct apmf_cnqf_power_set ps[APMF_CNQF_MAX]; } __packed; -enum smart_pc_status { - PMF_SMART_PC_ENABLED, - PMF_SMART_PC_DISABLED, -}; - /* Smart PC - TA internals */ enum system_state { SYSTEM_STATE_S0i3, diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index f8c0177afb0d..dcbe8f85e122 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -252,15 +252,17 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) cookie = readl(dev->policy_buf + POLICY_COOKIE_OFFSET); length = readl(dev->policy_buf + POLICY_COOKIE_LEN); - if (cookie != POLICY_SIGN_COOKIE || !length) + if (cookie != POLICY_SIGN_COOKIE || !length) { + dev_dbg(dev->dev, "cookie doesn't match\n"); return -EINVAL; + } /* Update the actual length */ dev->policy_sz = length + 512; res = amd_pmf_invoke_cmd_init(dev); if (res == TA_PMF_TYPE_SUCCESS) { /* Now its safe to announce that smart pc is enabled */ - dev->smart_pc_enabled = PMF_SMART_PC_ENABLED; + dev->smart_pc_enabled = true; /* * Start collecting the data from TA FW after a small delay * or else, we might end up getting stale values. @@ -268,7 +270,7 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) schedule_delayed_work(&dev->pb_work, msecs_to_jiffies(pb_actions_ms * 3)); } else { dev_err(dev->dev, "ta invoke cmd init failed err: %x\n", res); - dev->smart_pc_enabled = PMF_SMART_PC_DISABLED; + dev->smart_pc_enabled = false; return res; } @@ -336,25 +338,6 @@ static void amd_pmf_remove_pb(struct amd_pmf_dev *dev) {} static void amd_pmf_hex_dump_pb(struct amd_pmf_dev *dev) {} #endif -static int amd_pmf_get_bios_buffer(struct amd_pmf_dev *dev) -{ - dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL); - if (!dev->policy_buf) - return -ENOMEM; - - dev->policy_base = devm_ioremap(dev->dev, dev->policy_addr, dev->policy_sz); - if (!dev->policy_base) - return -ENOMEM; - - memcpy(dev->policy_buf, dev->policy_base, dev->policy_sz); - - amd_pmf_hex_dump_pb(dev); - if (pb_side_load) - amd_pmf_open_pb(dev, dev->dbgfs_dir); - - return amd_pmf_start_policy_engine(dev); -} - static int amd_pmf_amdtee_ta_match(struct tee_ioctl_version_data *ver, const void *data) { return ver->impl_id == TEE_IMPL_ID_AMDTEE; @@ -453,22 +436,59 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) return ret; INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd); - amd_pmf_set_dram_addr(dev, true); - amd_pmf_get_bios_buffer(dev); + + ret = amd_pmf_set_dram_addr(dev, true); + if (ret) + goto error; + + dev->policy_base = devm_ioremap(dev->dev, dev->policy_addr, dev->policy_sz); + if (!dev->policy_base) { + ret = -ENOMEM; + goto error; + } + + dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL); + if (!dev->policy_buf) { + ret = -ENOMEM; + goto error; + } + + memcpy(dev->policy_buf, dev->policy_base, dev->policy_sz); + + amd_pmf_hex_dump_pb(dev); + dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); - if (!dev->prev_data) - return -ENOMEM; + if (!dev->prev_data) { + ret = -ENOMEM; + goto error; + } + + ret = amd_pmf_start_policy_engine(dev); + if (ret) + goto error; + + if (pb_side_load) + amd_pmf_open_pb(dev, dev->dbgfs_dir); + + return 0; - return dev->smart_pc_enabled; +error: + amd_pmf_deinit_smart_pc(dev); + + return ret; } void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev) { - if (pb_side_load) + if (pb_side_load && dev->esbin) amd_pmf_remove_pb(dev); + cancel_delayed_work_sync(&dev->pb_work); kfree(dev->prev_data); + dev->prev_data = NULL; kfree(dev->policy_buf); - cancel_delayed_work_sync(&dev->pb_work); + dev->policy_buf = NULL; + kfree(dev->buf); + dev->buf = NULL; amd_pmf_tee_deinit(dev); } diff --git a/drivers/platform/x86/intel/int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c index b6708bab7c53..527d8fbc7cc1 100644 --- a/drivers/platform/x86/intel/int0002_vgpio.c +++ b/drivers/platform/x86/intel/int0002_vgpio.c @@ -196,7 +196,7 @@ static int int0002_probe(struct platform_device *pdev) * IRQs into gpiolib. */ ret = devm_request_irq(dev, irq, int0002_irq, - IRQF_SHARED, "INT0002", chip); + IRQF_ONESHOT | IRQF_SHARED, "INT0002", chip); if (ret) { dev_err(dev, "Error requesting IRQ %d: %d\n", irq, ret); return ret; diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 210b0a81b7ec..084c355c86f5 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -200,9 +200,6 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE); sparse_keymap_report_event(input_dev, event, val, autorelease); - - /* Some devices need this to report further events */ - acpi_evaluate_object(handle, "VBDL", NULL, NULL); } /* diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 6bd14d0132db..3d66e1d4eb1f 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -20,9 +20,11 @@ #define P2SBC_HIDE BIT(8) #define P2SB_DEVFN_DEFAULT PCI_DEVFN(31, 1) +#define P2SB_DEVFN_GOLDMONT PCI_DEVFN(13, 0) +#define SPI_DEVFN_GOLDMONT PCI_DEVFN(13, 2) static const struct x86_cpu_id p2sb_cpu_ids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, PCI_DEVFN(13, 0)), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, P2SB_DEVFN_GOLDMONT), {} }; @@ -98,21 +100,12 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) { - unsigned int slot, fn; - - if (PCI_FUNC(devfn) == 0) { - /* - * When function number of the P2SB device is zero, scan it and - * other function numbers, and if devices are available, cache - * their BAR0s. - */ - slot = PCI_SLOT(devfn); - for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) - p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); - } else { - /* Scan the P2SB device and cache its BAR0 */ - p2sb_scan_and_cache_devfn(bus, devfn); - } + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + + /* On Goldmont p2sb_bar() also gets called for the SPI controller */ + if (devfn == P2SB_DEVFN_GOLDMONT) + p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT); if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) return -ENOENT; diff --git a/drivers/platform/x86/serdev_helpers.h b/drivers/platform/x86/serdev_helpers.h new file mode 100644 index 000000000000..bcf3a0c356ea --- /dev/null +++ b/drivers/platform/x86/serdev_helpers.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * In some cases UART attached devices which require an in kernel driver, + * e.g. UART attached Bluetooth HCIs are described in the ACPI tables + * by an ACPI device with a broken or missing UartSerialBusV2() resource. + * + * This causes the kernel to create a /dev/ttyS# char-device for the UART + * instead of creating an in kernel serdev-controller + serdev-device pair + * for the in kernel driver. + * + * The quirk handling in acpi_quirk_skip_serdev_enumeration() makes the kernel + * create a serdev-controller device for these UARTs instead of a /dev/ttyS#. + * + * Instantiating the actual serdev-device to bind to is up to pdx86 code, + * this header provides a helper for getting the serdev-controller device. + */ +#include <linux/acpi.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/printk.h> +#include <linux/sprintf.h> +#include <linux/string.h> + +static inline struct device * +get_serdev_controller(const char *serial_ctrl_hid, + const char *serial_ctrl_uid, + int serial_ctrl_port, + const char *serdev_ctrl_name) +{ + struct device *ctrl_dev, *child; + struct acpi_device *ctrl_adev; + char name[32]; + int i; + + ctrl_adev = acpi_dev_get_first_match_dev(serial_ctrl_hid, serial_ctrl_uid, -1); + if (!ctrl_adev) { + pr_err("error could not get %s/%s serial-ctrl adev\n", + serial_ctrl_hid, serial_ctrl_uid); + return ERR_PTR(-ENODEV); + } + + /* get_first_physical_node() returns a weak ref */ + ctrl_dev = get_device(acpi_get_first_physical_node(ctrl_adev)); + if (!ctrl_dev) { + pr_err("error could not get %s/%s serial-ctrl physical node\n", + serial_ctrl_hid, serial_ctrl_uid); + ctrl_dev = ERR_PTR(-ENODEV); + goto put_ctrl_adev; + } + + /* Walk host -> uart-ctrl -> port -> serdev-ctrl */ + for (i = 0; i < 3; i++) { + switch (i) { + case 0: + snprintf(name, sizeof(name), "%s:0", dev_name(ctrl_dev)); + break; + case 1: + snprintf(name, sizeof(name), "%s.%d", + dev_name(ctrl_dev), serial_ctrl_port); + break; + case 2: + strscpy(name, serdev_ctrl_name, sizeof(name)); + break; + } + + child = device_find_child_by_name(ctrl_dev, name); + put_device(ctrl_dev); + if (!child) { + pr_err("error could not find '%s' device\n", name); + ctrl_dev = ERR_PTR(-ENODEV); + goto put_ctrl_adev; + } + + ctrl_dev = child; + } + +put_ctrl_adev: + acpi_dev_put(ctrl_adev); + return ctrl_dev; +} diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 3a396b763c49..ce3e08815a8e 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -1009,7 +1009,16 @@ static ssize_t current_value_store(struct kobject *kobj, * Note - this sets the variable and then the password as separate * WMI calls. Function tlmi_save_bios_settings will error if the * password is incorrect. + * Workstation's require the opcode to be set before changing the + * attribute. */ + if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) { + ret = tlmi_opcode_setting("WmiOpcodePasswordAdmin", + tlmi_priv.pwd_admin->password); + if (ret) + goto out; + } + set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name, new_setting); if (!set_str) { @@ -1021,17 +1030,10 @@ static ssize_t current_value_store(struct kobject *kobj, if (ret) goto out; - if (tlmi_priv.save_mode == TLMI_SAVE_BULK) { + if (tlmi_priv.save_mode == TLMI_SAVE_BULK) tlmi_priv.save_required = true; - } else { - if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) { - ret = tlmi_opcode_setting("WmiOpcodePasswordAdmin", - tlmi_priv.pwd_admin->password); - if (ret) - goto out; - } + else ret = tlmi_save_bios_settings(""); - } } else { /* old non-opcode based authentication method (deprecated) */ if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) { auth_str = kasprintf(GFP_KERNEL, "%s,%s,%s;", diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index c4895e9bc714..5ecd9d33250d 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10308,6 +10308,7 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode, return 0; default: /* Unknown function */ + pr_debug("unknown function 0x%x\n", funcmode); return -EOPNOTSUPP; } return 0; @@ -10493,8 +10494,8 @@ static void dytc_profile_refresh(void) return; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(funcmode, perfmode, &profile); - if (profile != dytc_current_profile) { + err = convert_dytc_to_profile(funcmode, perfmode, &profile); + if (!err && profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); } diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 7aee5e9ff2b8..975cf24ae359 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -81,7 +81,7 @@ static const struct property_entry chuwi_hi8_air_props[] = { }; static const struct ts_dmi_data chuwi_hi8_air_data = { - .acpi_name = "MSSL1680:00", + .acpi_name = "MSSL1680", .properties = chuwi_hi8_air_props, }; @@ -415,18 +415,13 @@ static const struct property_entry gdix1001_upside_down_props[] = { { } }; -static const struct ts_dmi_data gdix1001_00_upside_down_data = { - .acpi_name = "GDIX1001:00", - .properties = gdix1001_upside_down_props, -}; - -static const struct ts_dmi_data gdix1001_01_upside_down_data = { - .acpi_name = "GDIX1001:01", +static const struct ts_dmi_data gdix1001_upside_down_data = { + .acpi_name = "GDIX1001", .properties = gdix1001_upside_down_props, }; -static const struct ts_dmi_data gdix1002_00_upside_down_data = { - .acpi_name = "GDIX1002:00", +static const struct ts_dmi_data gdix1002_upside_down_data = { + .acpi_name = "GDIX1002", .properties = gdix1001_upside_down_props, }; @@ -1412,7 +1407,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* Juno Tablet */ - .driver_data = (void *)&gdix1002_00_upside_down_data, + .driver_data = (void *)&gdix1002_upside_down_data, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Default string"), /* Both product- and board-name being "Default string" is somewhat rare */ @@ -1658,7 +1653,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* Teclast X89 (Android version / BIOS) */ - .driver_data = (void *)&gdix1001_00_upside_down_data, + .driver_data = (void *)&gdix1001_upside_down_data, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "WISKY"), DMI_MATCH(DMI_BOARD_NAME, "3G062i"), @@ -1666,7 +1661,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* Teclast X89 (Windows version / BIOS) */ - .driver_data = (void *)&gdix1001_01_upside_down_data, + .driver_data = (void *)&gdix1001_upside_down_data, .matches = { /* tPAD is too generic, also match on bios date */ DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"), @@ -1684,7 +1679,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* Teclast X98 Pro */ - .driver_data = (void *)&gdix1001_00_upside_down_data, + .driver_data = (void *)&gdix1001_upside_down_data, .matches = { /* * Only match BIOS date, because the manufacturers @@ -1788,7 +1783,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* "WinBook TW100" */ - .driver_data = (void *)&gdix1001_00_upside_down_data, + .driver_data = (void *)&gdix1001_upside_down_data, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "WinBook"), DMI_MATCH(DMI_PRODUCT_NAME, "TW100") @@ -1796,7 +1791,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* WinBook TW700 */ - .driver_data = (void *)&gdix1001_00_upside_down_data, + .driver_data = (void *)&gdix1001_upside_down_data, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "WinBook"), DMI_MATCH(DMI_PRODUCT_NAME, "TW700") @@ -1821,7 +1816,7 @@ static void ts_dmi_add_props(struct i2c_client *client) int error; if (has_acpi_companion(dev) && - !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) { + strstarts(client->name, ts_data->acpi_name)) { error = device_create_managed_software_node(dev, ts_data->properties, NULL); if (error) dev_err(dev, "failed to add properties: %d\n", error); diff --git a/drivers/platform/x86/x86-android-tablets/core.c b/drivers/platform/x86/x86-android-tablets/core.c index f8221a15575b..a3415f1c0b5f 100644 --- a/drivers/platform/x86/x86-android-tablets/core.c +++ b/drivers/platform/x86/x86-android-tablets/core.c @@ -21,6 +21,7 @@ #include <linux/string.h> #include "x86-android-tablets.h" +#include "../serdev_helpers.h" static struct platform_device *x86_android_tablet_device; @@ -113,6 +114,9 @@ int x86_acpi_irq_helper_get(const struct x86_acpi_irq_data *data) if (irq_type != IRQ_TYPE_NONE && irq_type != irq_get_trigger_type(irq)) irq_set_irq_type(irq, irq_type); + if (data->free_gpio) + devm_gpiod_put(&x86_android_tablet_device->dev, gpiod); + return irq; case X86_ACPI_IRQ_TYPE_PMIC: status = acpi_get_handle(NULL, data->chip, &handle); @@ -229,38 +233,20 @@ static __init int x86_instantiate_spi_dev(const struct x86_dev_info *dev_info, i static __init int x86_instantiate_serdev(const struct x86_serdev_info *info, int idx) { - struct acpi_device *ctrl_adev, *serdev_adev; + struct acpi_device *serdev_adev; struct serdev_device *serdev; struct device *ctrl_dev; int ret = -ENODEV; - ctrl_adev = acpi_dev_get_first_match_dev(info->ctrl_hid, info->ctrl_uid, -1); - if (!ctrl_adev) { - pr_err("error could not get %s/%s ctrl adev\n", - info->ctrl_hid, info->ctrl_uid); - return -ENODEV; - } + ctrl_dev = get_serdev_controller(info->ctrl_hid, info->ctrl_uid, 0, + info->ctrl_devname); + if (IS_ERR(ctrl_dev)) + return PTR_ERR(ctrl_dev); serdev_adev = acpi_dev_get_first_match_dev(info->serdev_hid, NULL, -1); if (!serdev_adev) { pr_err("error could not get %s serdev adev\n", info->serdev_hid); - goto put_ctrl_adev; - } - - /* get_first_physical_node() returns a weak ref, no need to put() it */ - ctrl_dev = acpi_get_first_physical_node(ctrl_adev); - if (!ctrl_dev) { - pr_err("error could not get %s/%s ctrl physical dev\n", - info->ctrl_hid, info->ctrl_uid); - goto put_serdev_adev; - } - - /* ctrl_dev now points to the controller's parent, get the controller */ - ctrl_dev = device_find_child_by_name(ctrl_dev, info->ctrl_devname); - if (!ctrl_dev) { - pr_err("error could not get %s/%s %s ctrl dev\n", - info->ctrl_hid, info->ctrl_uid, info->ctrl_devname); - goto put_serdev_adev; + goto put_ctrl_dev; } serdev = serdev_device_alloc(to_serdev_controller(ctrl_dev)); @@ -283,8 +269,8 @@ static __init int x86_instantiate_serdev(const struct x86_serdev_info *info, int put_serdev_adev: acpi_dev_put(serdev_adev); -put_ctrl_adev: - acpi_dev_put(ctrl_adev); +put_ctrl_dev: + put_device(ctrl_dev); return ret; } diff --git a/drivers/platform/x86/x86-android-tablets/lenovo.c b/drivers/platform/x86/x86-android-tablets/lenovo.c index f1c66a61bfc5..c297391955ad 100644 --- a/drivers/platform/x86/x86-android-tablets/lenovo.c +++ b/drivers/platform/x86/x86-android-tablets/lenovo.c @@ -116,6 +116,7 @@ static const struct x86_i2c_client_info lenovo_yb1_x90_i2c_clients[] __initconst .trigger = ACPI_EDGE_SENSITIVE, .polarity = ACPI_ACTIVE_LOW, .con_id = "goodix_ts_irq", + .free_gpio = true, }, }, { /* Wacom Digitizer in keyboard half */ diff --git a/drivers/platform/x86/x86-android-tablets/other.c b/drivers/platform/x86/x86-android-tablets/other.c index bc6bbf7ec6ea..278402dcb808 100644 --- a/drivers/platform/x86/x86-android-tablets/other.c +++ b/drivers/platform/x86/x86-android-tablets/other.c @@ -68,7 +68,7 @@ static const struct x86_i2c_client_info acer_b1_750_i2c_clients[] __initconst = }, }; -static struct gpiod_lookup_table acer_b1_750_goodix_gpios = { +static struct gpiod_lookup_table acer_b1_750_nvt_ts_gpios = { .dev_id = "i2c-NVT-ts", .table = { GPIO_LOOKUP("INT33FC:01", 26, "reset", GPIO_ACTIVE_LOW), @@ -77,7 +77,7 @@ static struct gpiod_lookup_table acer_b1_750_goodix_gpios = { }; static struct gpiod_lookup_table * const acer_b1_750_gpios[] = { - &acer_b1_750_goodix_gpios, + &acer_b1_750_nvt_ts_gpios, &int3496_reference_gpios, NULL }; diff --git a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h index 49fed9410adb..468993edfeee 100644 --- a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h +++ b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h @@ -39,6 +39,7 @@ struct x86_acpi_irq_data { int index; int trigger; /* ACPI_EDGE_SENSITIVE / ACPI_LEVEL_SENSITIVE */ int polarity; /* ACPI_ACTIVE_HIGH / ACPI_ACTIVE_LOW / ACPI_ACTIVE_BOTH */ + bool free_gpio; /* Release GPIO after getting IRQ (for TYPE_GPIOINT) */ const char *con_id; }; diff --git a/drivers/pmdomain/arm/scmi_perf_domain.c b/drivers/pmdomain/arm/scmi_perf_domain.c index 709bbc448fad..d7ef46ccd9b8 100644 --- a/drivers/pmdomain/arm/scmi_perf_domain.c +++ b/drivers/pmdomain/arm/scmi_perf_domain.c @@ -159,6 +159,9 @@ static void scmi_perf_domain_remove(struct scmi_device *sdev) struct genpd_onecell_data *scmi_pd_data = dev_get_drvdata(dev); int i; + if (!scmi_pd_data) + return; + of_genpd_del_provider(dev->of_node); for (i = 0; i < scmi_pd_data->num_domains; i++) diff --git a/drivers/pmdomain/qcom/rpmhpd.c b/drivers/pmdomain/qcom/rpmhpd.c index 3078896b1300..47df910645f6 100644 --- a/drivers/pmdomain/qcom/rpmhpd.c +++ b/drivers/pmdomain/qcom/rpmhpd.c @@ -692,6 +692,7 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) unsigned int active_corner, sleep_corner; unsigned int this_active_corner = 0, this_sleep_corner = 0; unsigned int peer_active_corner = 0, peer_sleep_corner = 0; + unsigned int peer_enabled_corner; if (pd->state_synced) { to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner); @@ -701,9 +702,11 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) this_sleep_corner = pd->level_count - 1; } - if (peer && peer->enabled) - to_active_sleep(peer, peer->corner, &peer_active_corner, + if (peer && peer->enabled) { + peer_enabled_corner = max(peer->corner, peer->enable_corner); + to_active_sleep(peer, peer_enabled_corner, &peer_active_corner, &peer_sleep_corner); + } active_corner = max(this_active_corner, peer_active_corner); diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig index f21cb05815ec..3e31375491d5 100644 --- a/drivers/power/supply/Kconfig +++ b/drivers/power/supply/Kconfig @@ -978,6 +978,7 @@ config CHARGER_QCOM_SMB2 config FUEL_GAUGE_MM8013 tristate "Mitsumi MM8013 fuel gauge driver" depends on I2C + select REGMAP_I2C help Say Y here to enable the Mitsumi MM8013 fuel gauge driver. It enables the monitoring of many battery parameters, including diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 3a1798b0c1a7..9910c600743e 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client) { struct bq27xxx_device_info *di = i2c_get_clientdata(client); - free_irq(client->irq, di); + if (client->irq) + free_irq(client->irq, di); + bq27xxx_battery_teardown(di); mutex_lock(&battery_mutex); diff --git a/drivers/ptp/ptp_fc3.c b/drivers/ptp/ptp_fc3.c index 0e2286ba088a..6ef982862e27 100644 --- a/drivers/ptp/ptp_fc3.c +++ b/drivers/ptp/ptp_fc3.c @@ -996,13 +996,11 @@ static int idtfc3_probe(struct platform_device *pdev) return 0; } -static int idtfc3_remove(struct platform_device *pdev) +static void idtfc3_remove(struct platform_device *pdev) { struct idtfc3 *idtfc3 = platform_get_drvdata(pdev); ptp_clock_unregister(idtfc3->ptp_clock); - - return 0; } static struct platform_driver idtfc3_driver = { @@ -1010,7 +1008,7 @@ static struct platform_driver idtfc3_driver = { .name = "rc38xxx-phc", }, .probe = idtfc3_probe, - .remove = idtfc3_remove, + .remove_new = idtfc3_remove, }; module_platform_driver(idtfc3_driver); diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c index 830a1c4cd705..8bbcd983a74a 100644 --- a/drivers/regulator/max5970-regulator.c +++ b/drivers/regulator/max5970-regulator.c @@ -29,8 +29,8 @@ struct max5970_regulator { }; enum max597x_regulator_id { - MAX597X_SW0, - MAX597X_SW1, + MAX597X_sw0, + MAX597X_sw1, }; static int max5970_read_adc(struct regmap *regmap, int reg, long *val) @@ -378,8 +378,8 @@ static int max597x_dt_parse(struct device_node *np, } static const struct regulator_desc regulators[] = { - MAX597X_SWITCH(SW0, MAX5970_REG_CHXEN, 0, "vss1"), - MAX597X_SWITCH(SW1, MAX5970_REG_CHXEN, 1, "vss2"), + MAX597X_SWITCH(sw0, MAX5970_REG_CHXEN, 0, "vss1"), + MAX597X_SWITCH(sw1, MAX5970_REG_CHXEN, 1, "vss2"), }; static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg, diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index c533d1dadc6b..a5dba3829769 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, return -EINVAL; if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; - if (cdev->private->state == DEV_STATE_VERIFY) { + if (cdev->private->state == DEV_STATE_VERIFY || + cdev->private->flags.doverify) { /* Remember to fake irb when finished. */ if (!cdev->private->flags.fake_irb) { cdev->private->flags.fake_irb = FAKE_CMD_IRB; @@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, } if (cdev->private->state != DEV_STATE_ONLINE || ((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) && - !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) || - cdev->private->flags.doverify) + !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS))) return -EBUSY; ret = cio_set_options (sch, flags); if (ret) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index addac7fbe37b..9ce27092729c 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1270,7 +1270,7 @@ source "drivers/scsi/arm/Kconfig" config JAZZ_ESP bool "MIPS JAZZ FAS216 SCSI support" - depends on MACH_JAZZ && SCSI + depends on MACH_JAZZ && SCSI=y select SCSI_SPI_ATTRS help This is the driver for the onboard SCSI host adapter of MIPS Magnum diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c index c0c8ab586957..d32ad46318cb 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_transport.c +++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c @@ -1671,7 +1671,7 @@ mpi3mr_update_mr_sas_port(struct mpi3mr_ioc *mrioc, struct host_port *h_port, void mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc) { - struct host_port h_port[64]; + struct host_port *h_port = NULL; int i, j, found, host_port_count = 0, port_idx; u16 sz, attached_handle, ioc_status; struct mpi3_sas_io_unit_page0 *sas_io_unit_pg0 = NULL; @@ -1685,6 +1685,10 @@ mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc) sas_io_unit_pg0 = kzalloc(sz, GFP_KERNEL); if (!sas_io_unit_pg0) return; + h_port = kcalloc(64, sizeof(struct host_port), GFP_KERNEL); + if (!h_port) + goto out; + if (mpi3mr_cfg_get_sas_io_unit_pg0(mrioc, sas_io_unit_pg0, sz)) { ioc_err(mrioc, "failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); @@ -1814,6 +1818,7 @@ mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc) } } out: + kfree(h_port); kfree(sas_io_unit_pg0); } diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 8761bc58d965..b8120ca93c79 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7378,7 +7378,9 @@ _base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout) return -EFAULT; } - issue_diag_reset: + return 0; + +issue_diag_reset: rc = _base_diag_reset(ioc); return rc; } diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 76d369343c7a..8cad9792a562 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer, return result + 4; } +enum scsi_vpd_parameters { + SCSI_VPD_HEADER_SIZE = 4, + SCSI_VPD_LIST_SIZE = 36, +}; + static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) { - unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4); + unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4); int result; if (sdev->no_vpd_size) return SCSI_DEFAULT_VPD_LEN; /* + * Fetch the supported pages VPD and validate that the requested page + * number is present. + */ + if (page != 0) { + result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd)); + if (result < SCSI_VPD_HEADER_SIZE) + return 0; + + result -= SCSI_VPD_HEADER_SIZE; + if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result)) + return 0; + } + /* * Fetch the VPD page header to find out how big the page * is. This is done to prevent problems on legacy devices * which can not handle allocation lengths as large as * potentially requested by the caller. */ - result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header)); + result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE); if (result < 0) return 0; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0833b3e6aa6e..bdd0acf7fa3c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3407,6 +3407,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } +static void sd_read_block_zero(struct scsi_disk *sdkp) +{ + unsigned int buf_len = sdkp->device->sector_size; + char *buffer, cmd[10] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + + scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, + SD_TIMEOUT, sdkp->max_retries, NULL); + kfree(buffer); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3446,7 +3464,13 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - + /* + * Some USB/UAS devices return generic values for mode pages + * until the media has been accessed. Trigger a READ operation + * to force the device to populate mode pages. + */ + if (sdp->read_before_ms) + sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index ceff1ec13f9e..385180c98be4 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -6533,8 +6533,11 @@ static void pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + if (!ctrl_info->disable_managed_interrupts) + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], ctrl_info->pci_dev, 0); + else + return blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]); } static inline bool pqi_is_tape_changer_device(struct pqi_scsi_dev *device) diff --git a/drivers/soc/microchip/Kconfig b/drivers/soc/microchip/Kconfig index 9b0fdd95276e..19f4b576f822 100644 --- a/drivers/soc/microchip/Kconfig +++ b/drivers/soc/microchip/Kconfig @@ -1,5 +1,5 @@ config POLARFIRE_SOC_SYS_CTRL - tristate "POLARFIRE_SOC_SYS_CTRL" + tristate "Microchip PolarFire SoC (MPFS) system controller support" depends on POLARFIRE_SOC_MAILBOX depends on MTD help diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c index f4bfd24386f1..f913e9bd57ed 100644 --- a/drivers/soc/qcom/pmic_glink.c +++ b/drivers/soc/qcom/pmic_glink.c @@ -265,10 +265,17 @@ static int pmic_glink_probe(struct platform_device *pdev) pg->client_mask = *match_data; + pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg); + if (IS_ERR(pg->pdr)) { + ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr), + "failed to initialize pdr\n"); + return ret; + } + if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI)) { ret = pmic_glink_add_aux_device(pg, &pg->ucsi_aux, "ucsi"); if (ret) - return ret; + goto out_release_pdr_handle; } if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_ALTMODE)) { ret = pmic_glink_add_aux_device(pg, &pg->altmode_aux, "altmode"); @@ -281,17 +288,11 @@ static int pmic_glink_probe(struct platform_device *pdev) goto out_release_altmode_aux; } - pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg); - if (IS_ERR(pg->pdr)) { - ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr), "failed to initialize pdr\n"); - goto out_release_aux_devices; - } - service = pdr_add_lookup(pg->pdr, "tms/servreg", "msm/adsp/charger_pd"); if (IS_ERR(service)) { ret = dev_err_probe(&pdev->dev, PTR_ERR(service), "failed adding pdr lookup for charger_pd\n"); - goto out_release_pdr_handle; + goto out_release_aux_devices; } mutex_lock(&__pmic_glink_lock); @@ -300,8 +301,6 @@ static int pmic_glink_probe(struct platform_device *pdev) return 0; -out_release_pdr_handle: - pdr_handle_release(pg->pdr); out_release_aux_devices: if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_BATT)) pmic_glink_del_aux_device(pg, &pg->ps_aux); @@ -311,6 +310,8 @@ out_release_altmode_aux: out_release_ucsi_aux: if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI)) pmic_glink_del_aux_device(pg, &pg->ucsi_aux); +out_release_pdr_handle: + pdr_handle_release(pg->pdr); return ret; } diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c index 5fcd0fdd2faa..b3808fc24c69 100644 --- a/drivers/soc/qcom/pmic_glink_altmode.c +++ b/drivers/soc/qcom/pmic_glink_altmode.c @@ -76,7 +76,7 @@ struct pmic_glink_altmode_port { struct work_struct work; - struct device *bridge; + struct auxiliary_device *bridge; enum typec_orientation orientation; u16 svid; @@ -230,7 +230,7 @@ static void pmic_glink_altmode_worker(struct work_struct *work) else pmic_glink_altmode_enable_usb(altmode, alt_port); - drm_aux_hpd_bridge_notify(alt_port->bridge, + drm_aux_hpd_bridge_notify(&alt_port->bridge->dev, alt_port->hpd_state ? connector_status_connected : connector_status_disconnected); @@ -454,7 +454,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, alt_port->index = port; INIT_WORK(&alt_port->work, pmic_glink_altmode_worker); - alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode)); + alt_port->bridge = devm_drm_dp_hpd_bridge_alloc(dev, to_of_node(fwnode)); if (IS_ERR(alt_port->bridge)) { fwnode_handle_put(fwnode); return PTR_ERR(alt_port->bridge); @@ -510,6 +510,16 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, } } + for (port = 0; port < ARRAY_SIZE(altmode->ports); port++) { + alt_port = &altmode->ports[port]; + if (!alt_port->bridge) + continue; + + ret = devm_drm_dp_hpd_bridge_add(dev, alt_port->bridge); + if (ret) + return ret; + } + altmode->client = devm_pmic_glink_register_client(dev, altmode->owner_id, pmic_glink_altmode_callback, diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index f94e0d370d46..1a8d03958dff 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1927,24 +1927,18 @@ static void cqspi_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); } -static int cqspi_suspend(struct device *dev) +static int cqspi_runtime_suspend(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); - struct spi_controller *host = dev_get_drvdata(dev); - int ret; - ret = spi_controller_suspend(host); cqspi_controller_enable(cqspi, 0); - clk_disable_unprepare(cqspi->clk); - - return ret; + return 0; } -static int cqspi_resume(struct device *dev) +static int cqspi_runtime_resume(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); - struct spi_controller *host = dev_get_drvdata(dev); clk_prepare_enable(cqspi->clk); cqspi_wait_idle(cqspi); @@ -1952,12 +1946,27 @@ static int cqspi_resume(struct device *dev) cqspi->current_cs = -1; cqspi->sclk = 0; + return 0; +} + +static int cqspi_suspend(struct device *dev) +{ + struct cqspi_st *cqspi = dev_get_drvdata(dev); + + return spi_controller_suspend(cqspi->host); +} - return spi_controller_resume(host); +static int cqspi_resume(struct device *dev) +{ + struct cqspi_st *cqspi = dev_get_drvdata(dev); + + return spi_controller_resume(cqspi->host); } -static DEFINE_RUNTIME_DEV_PM_OPS(cqspi_dev_pm_ops, cqspi_suspend, - cqspi_resume, NULL); +static const struct dev_pm_ops cqspi_dev_pm_ops = { + RUNTIME_PM_OPS(cqspi_runtime_suspend, cqspi_runtime_resume, NULL) + SYSTEM_SLEEP_PM_OPS(cqspi_suspend, cqspi_resume) +}; static const struct cqspi_driver_platdata cdns_qspi = { .quirks = CQSPI_DISABLE_DAC_MODE, diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 942c3117ab3a..82d6264841fc 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -359,22 +359,22 @@ static int spi_ppc4xx_of_probe(struct platform_device *op) /* Setup the state for the bitbang driver */ bbp = &hw->bitbang; - bbp->ctlr = hw->host; + bbp->master = hw->host; bbp->setup_transfer = spi_ppc4xx_setupxfer; bbp->txrx_bufs = spi_ppc4xx_txrx; bbp->use_dma = 0; - bbp->ctlr->setup = spi_ppc4xx_setup; - bbp->ctlr->cleanup = spi_ppc4xx_cleanup; - bbp->ctlr->bits_per_word_mask = SPI_BPW_MASK(8); - bbp->ctlr->use_gpio_descriptors = true; + bbp->master->setup = spi_ppc4xx_setup; + bbp->master->cleanup = spi_ppc4xx_cleanup; + bbp->master->bits_per_word_mask = SPI_BPW_MASK(8); + bbp->master->use_gpio_descriptors = true; /* * The SPI core will count the number of GPIO descriptors to figure * out the number of chip selects available on the platform. */ - bbp->ctlr->num_chipselect = 0; + bbp->master->num_chipselect = 0; /* the spi->mode bits understood by this driver: */ - bbp->ctlr->mode_bits = + bbp->master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST; /* Get the clock for the OPB */ diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 41b7489d37ce..ed4fd22eac6e 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -907,12 +907,15 @@ new_bio: return 0; fail: - if (bio) - bio_put(bio); + if (bio) { + bio_uninit(bio); + kfree(bio); + } while (req->bio) { bio = req->bio; req->bio = bio->bi_next; - bio_put(bio); + bio_uninit(bio); + kfree(bio); } req->biotail = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 4b1092127694..1892e49a8e6a 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -90,13 +90,14 @@ static int optee_register_device(const uuid_t *device_uuid, u32 func) if (rc) { pr_err("device registration failed, err: %d\n", rc); put_device(&optee_device->dev); + return rc; } if (func == PTA_CMD_GET_DEVICES_SUPP) device_create_file(&optee_device->dev, &dev_attr_need_supplicant); - return rc; + return 0; } static int __optee_enumerate_devices(u32 func) diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig index 6e05c5c7bca1..c2a4e88b328f 100644 --- a/drivers/tty/hvc/Kconfig +++ b/drivers/tty/hvc/Kconfig @@ -108,13 +108,15 @@ config HVC_DCC_SERIALIZE_SMP config HVC_RISCV_SBI bool "RISC-V SBI console support" - depends on RISCV_SBI + depends on RISCV_SBI && NONPORTABLE select HVC_DRIVER help This enables support for console output via RISC-V SBI calls, which - is normally used only during boot to output printk. + is normally used only during boot to output printk. This driver + conflicts with real console drivers and should not be enabled on + systems that directly access the console. - If you don't know what do to here, say Y. + If you don't know what do to here, say N. config HVCS tristate "IBM Hypervisor Virtual Console Server support" diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index fccec1698a54..cf2c890a560f 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1339,11 +1339,41 @@ static void pl011_start_tx_pio(struct uart_amba_port *uap) } } +static void pl011_rs485_tx_start(struct uart_amba_port *uap) +{ + struct uart_port *port = &uap->port; + u32 cr; + + /* Enable transmitter */ + cr = pl011_read(uap, REG_CR); + cr |= UART011_CR_TXE; + + /* Disable receiver if half-duplex */ + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + cr &= ~UART011_CR_RXE; + + if (port->rs485.flags & SER_RS485_RTS_ON_SEND) + cr &= ~UART011_CR_RTS; + else + cr |= UART011_CR_RTS; + + pl011_write(cr, uap, REG_CR); + + if (port->rs485.delay_rts_before_send) + mdelay(port->rs485.delay_rts_before_send); + + uap->rs485_tx_started = true; +} + static void pl011_start_tx(struct uart_port *port) { struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + if ((uap->port.rs485.flags & SER_RS485_ENABLED) && + !uap->rs485_tx_started) + pl011_rs485_tx_start(uap); + if (!pl011_dma_tx_start(uap)) pl011_start_tx_pio(uap); } @@ -1424,42 +1454,12 @@ static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c, return true; } -static void pl011_rs485_tx_start(struct uart_amba_port *uap) -{ - struct uart_port *port = &uap->port; - u32 cr; - - /* Enable transmitter */ - cr = pl011_read(uap, REG_CR); - cr |= UART011_CR_TXE; - - /* Disable receiver if half-duplex */ - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - cr &= ~UART011_CR_RXE; - - if (port->rs485.flags & SER_RS485_RTS_ON_SEND) - cr &= ~UART011_CR_RTS; - else - cr |= UART011_CR_RTS; - - pl011_write(cr, uap, REG_CR); - - if (port->rs485.delay_rts_before_send) - mdelay(port->rs485.delay_rts_before_send); - - uap->rs485_tx_started = true; -} - /* Returns true if tx interrupts have to be (kept) enabled */ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) { struct circ_buf *xmit = &uap->port.state->xmit; int count = uap->fifosize >> 1; - if ((uap->port.rs485.flags & SER_RS485_ENABLED) && - !uap->rs485_tx_started) - pl011_rs485_tx_start(uap); - if (uap->port.x_char) { if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) return true; diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 794b77512740..693e932d6feb 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -251,7 +251,9 @@ static int stm32_usart_config_rs485(struct uart_port *port, struct ktermios *ter writel_relaxed(cr3, port->membase + ofs->cr3); writel_relaxed(cr1, port->membase + ofs->cr1); - rs485conf->flags |= SER_RS485_RX_DURING_TX; + if (!port->rs485_rx_during_tx_gpio) + rs485conf->flags |= SER_RS485_RX_DURING_TX; + } else { stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DEM | USART_CR3_DEP); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index d77b25b79ae3..3b89c9d4aa40 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1469,7 +1469,7 @@ static int ufshcd_devfreq_target(struct device *dev, int ret = 0; struct ufs_hba *hba = dev_get_drvdata(dev); ktime_t start; - bool scale_up, sched_clk_scaling_suspend_work = false; + bool scale_up = false, sched_clk_scaling_suspend_work = false; struct list_head *clk_list = &hba->clk_list_head; struct ufs_clk_info *clki; unsigned long irq_flags; diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index aeca902ab6cc..fd1beb10bba7 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -828,7 +828,11 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep, return; } - if (request->complete) { + /* + * zlp request is appended by driver, needn't call usb_gadget_giveback_request() to notify + * gadget composite driver. + */ + if (request->complete && request->buf != priv_dev->zlp_buf) { spin_unlock(&priv_dev->lock); usb_gadget_giveback_request(&priv_ep->endpoint, request); @@ -2540,11 +2544,11 @@ static int cdns3_gadget_ep_disable(struct usb_ep *ep) while (!list_empty(&priv_ep->wa2_descmiss_req_list)) { priv_req = cdns3_next_priv_request(&priv_ep->wa2_descmiss_req_list); + list_del_init(&priv_req->list); kfree(priv_req->request.buf); cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); - list_del_init(&priv_req->list); --priv_ep->wa2_counter; } diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 33548771a0d3..465e9267b49c 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -395,7 +395,6 @@ pm_put: return ret; } - /** * cdns_wakeup_irq - interrupt handler for wakeup events * @irq: irq number for cdns3/cdnsp core device diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index 04b6d12f2b9a..ee917f1b091c 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -156,7 +156,8 @@ bool cdns_is_device(struct cdns *cdns) */ static void cdns_otg_disable_irq(struct cdns *cdns) { - writel(0, &cdns->otg_irq_regs->ien); + if (cdns->version) + writel(0, &cdns->otg_irq_regs->ien); } /** @@ -422,15 +423,20 @@ int cdns_drd_init(struct cdns *cdns) cdns->otg_regs = (void __iomem *)&cdns->otg_v1_regs->cmd; - if (readl(&cdns->otg_cdnsp_regs->did) == OTG_CDNSP_DID) { + state = readl(&cdns->otg_cdnsp_regs->did); + + if (OTG_CDNSP_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_cdnsp_regs->ien; cdns->version = CDNSP_CONTROLLER_V2; - } else { + } else if (OTG_CDNS3_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_v1_regs->ien; writel(1, &cdns->otg_v1_regs->simulate); cdns->version = CDNS3_CONTROLLER_V1; + } else { + dev_err(cdns->dev, "not supporte DID=0x%08x\n", state); + return -EINVAL; } dev_dbg(cdns->dev, "DRD version v1 (ID: %08x, rev: %08x)\n", @@ -483,7 +489,6 @@ int cdns_drd_exit(struct cdns *cdns) return 0; } - /* Indicate the cdns3 core was power lost before */ bool cdns_power_is_lost(struct cdns *cdns) { diff --git a/drivers/usb/cdns3/drd.h b/drivers/usb/cdns3/drd.h index cbdf94f73ed9..d72370c321d3 100644 --- a/drivers/usb/cdns3/drd.h +++ b/drivers/usb/cdns3/drd.h @@ -79,7 +79,11 @@ struct cdnsp_otg_regs { __le32 susp_timing_ctrl; }; -#define OTG_CDNSP_DID 0x0004034E +/* CDNSP driver supports 0x000403xx Cadence USB controller family. */ +#define OTG_CDNSP_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040300) + +/* CDNS3 driver supports 0x000402xx Cadence USB controller family. */ +#define OTG_CDNS3_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040200) /* * Common registers interface for both CDNS3 and CDNSP version of DRD. diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 6164fc4c96a4..ceca4d839dfd 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -18,6 +18,11 @@ #include "../host/xhci.h" #include "../host/xhci-plat.h" +/* + * The XECP_PORT_CAP_REG and XECP_AUX_CTRL_REG1 exist only + * in Cadence USB3 dual-role controller, so it can't be used + * with Cadence CDNSP dual-role controller. + */ #define XECP_PORT_CAP_REG 0x8000 #define XECP_AUX_CTRL_REG1 0x8120 @@ -57,6 +62,8 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { .resume_quirk = xhci_cdns3_resume_quirk, }; +static const struct xhci_plat_priv xhci_plat_cdnsp_xhci; + static int __cdns_host_init(struct cdns *cdns) { struct platform_device *xhci; @@ -81,8 +88,13 @@ static int __cdns_host_init(struct cdns *cdns) goto err1; } - cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, - sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (cdns->version < CDNSP_CONTROLLER_V2) + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + else + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdnsp_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (!cdns->xhci_plat_data) { ret = -ENOMEM; goto err1; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 4c8dd6724678..28f49400f3e8 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2650,6 +2650,11 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) int ret; spin_lock_irqsave(&dwc->lock, flags); + if (!dwc->pullups_connected) { + spin_unlock_irqrestore(&dwc->lock, flags); + return 0; + } + dwc->connected = false; /* diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index ca5d5f564998..e2a059cfda2c 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1338,7 +1338,15 @@ parse_ntb: "Parsed NTB with %d frames\n", dgram_counter); to_process -= block_len; - if (to_process != 0) { + + /* + * Windows NCM driver avoids USB ZLPs by adding a 1-byte + * zero pad as needed. + */ + if (to_process == 1 && + (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { + to_process--; + } else if (to_process > 0) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 10c5d7f726a1..f90eeecf27de 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2036,7 +2036,8 @@ static irqreturn_t omap_udc_iso_irq(int irq, void *_dev) static inline int machine_without_vbus_sense(void) { - return machine_is_omap_osk() || machine_is_sx1(); + return machine_is_omap_osk() || machine_is_omap_palmte() || + machine_is_sx1(); } static int omap_udc_start(struct usb_gadget *g, diff --git a/drivers/usb/host/uhci-grlib.c b/drivers/usb/host/uhci-grlib.c index ac3fc5970315..cfebb833668e 100644 --- a/drivers/usb/host/uhci-grlib.c +++ b/drivers/usb/host/uhci-grlib.c @@ -22,6 +22,7 @@ #include <linux/of_irq.h> #include <linux/of_address.h> #include <linux/of_platform.h> +#include <linux/platform_device.h> static int uhci_grlib_init(struct usb_hcd *hcd) { diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index ae41578bd014..70165dd86b5d 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -21,7 +21,9 @@ static const struct class role_class = { struct usb_role_switch { struct device dev; struct mutex lock; /* device lock*/ + struct module *module; /* the module this device depends on */ enum usb_role role; + bool registered; /* From descriptor */ struct device *usb2_port; @@ -48,6 +50,9 @@ int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role) if (IS_ERR_OR_NULL(sw)) return 0; + if (!sw->registered) + return -EOPNOTSUPP; + mutex_lock(&sw->lock); ret = sw->set(sw, role); @@ -73,7 +78,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw) { enum usb_role role; - if (IS_ERR_OR_NULL(sw)) + if (IS_ERR_OR_NULL(sw) || !sw->registered) return USB_ROLE_NONE; mutex_lock(&sw->lock); @@ -135,7 +140,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev) usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -157,7 +162,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode) sw = fwnode_connection_find_match(fwnode, "usb-role-switch", NULL, usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -172,7 +177,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get); void usb_role_switch_put(struct usb_role_switch *sw) { if (!IS_ERR_OR_NULL(sw)) { - module_put(sw->dev.parent->driver->owner); + module_put(sw->module); put_device(&sw->dev); } } @@ -189,15 +194,18 @@ struct usb_role_switch * usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) { struct device *dev; + struct usb_role_switch *sw = NULL; if (!fwnode) return NULL; dev = class_find_device_by_fwnode(&role_class, fwnode); - if (dev) - WARN_ON(!try_module_get(dev->parent->driver->owner)); + if (dev) { + sw = to_role_switch(dev); + WARN_ON(!try_module_get(sw->module)); + } - return dev ? to_role_switch(dev) : NULL; + return sw; } EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode); @@ -338,6 +346,7 @@ usb_role_switch_register(struct device *parent, sw->set = desc->set; sw->get = desc->get; + sw->module = parent->driver->owner; sw->dev.parent = parent; sw->dev.fwnode = desc->fwnode; sw->dev.class = &role_class; @@ -352,6 +361,8 @@ usb_role_switch_register(struct device *parent, return ERR_PTR(ret); } + sw->registered = true; + /* TODO: Symlinks for the host port and the device controller. */ return sw; @@ -366,8 +377,10 @@ EXPORT_SYMBOL_GPL(usb_role_switch_register); */ void usb_role_switch_unregister(struct usb_role_switch *sw) { - if (!IS_ERR_OR_NULL(sw)) + if (!IS_ERR_OR_NULL(sw)) { + sw->registered = false; device_unregister(&sw->dev); + } } EXPORT_SYMBOL_GPL(usb_role_switch_unregister); diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index c54e9805da53..12cf9940e5b6 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -180,6 +180,13 @@ static int slave_configure(struct scsi_device *sdev) sdev->use_192_bytes_for_3f = 1; /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device * is write-protected. Now that we tell the sd driver diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 9707f53cfda9..71ace274761f 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -879,6 +879,13 @@ static int uas_slave_configure(struct scsi_device *sdev) sdev->guess_capacity = 1; /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device * is write-protected. Now that we tell the sd driver diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index f7d7daa60c8d..66e532edcece 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -3743,9 +3743,6 @@ static void tcpm_detach(struct tcpm_port *port) if (tcpm_port_is_disconnected(port)) port->hard_reset_count = 0; - port->try_src_count = 0; - port->try_snk_count = 0; - if (!port->attached) return; @@ -4876,7 +4873,8 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - tcpm_set_cc(port, TYPEC_CC_OPEN); + tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ? + TYPEC_CC_RD : tcpm_rp_cc(port)); tcpm_set_state(port, PORT_RESET_WAIT_OFF, PD_T_ERROR_RECOVERY); break; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f2ed7167c848..4b2fcb228a0a 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -141,10 +141,8 @@ struct vhost_net { unsigned tx_zcopy_err; /* Flush in progress. Protected by tx vq lock. */ bool tx_flush; - /* Private page frag */ - struct page_frag page_frag; - /* Refcount bias of page frag */ - int refcnt_bias; + /* Private page frag cache */ + struct page_frag_cache pf_cache; }; static unsigned vhost_net_zcopy_mask __read_mostly; @@ -655,41 +653,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) !vhost_vq_avail_empty(vq->dev, vq); } -static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, - struct page_frag *pfrag, gfp_t gfp) -{ - if (pfrag->page) { - if (pfrag->offset + sz <= pfrag->size) - return true; - __page_frag_cache_drain(pfrag->page, net->refcnt_bias); - } - - pfrag->offset = 0; - net->refcnt_bias = 0; - if (SKB_FRAG_PAGE_ORDER) { - /* Avoid direct reclaim but allow kswapd to wake */ - pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | - __GFP_COMP | __GFP_NOWARN | - __GFP_NORETRY, - SKB_FRAG_PAGE_ORDER); - if (likely(pfrag->page)) { - pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; - goto done; - } - } - pfrag->page = alloc_page(gfp); - if (likely(pfrag->page)) { - pfrag->size = PAGE_SIZE; - goto done; - } - return false; - -done: - net->refcnt_bias = USHRT_MAX; - page_ref_add(pfrag->page, USHRT_MAX - 1); - return true; -} - #define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, @@ -699,7 +662,6 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); struct socket *sock = vhost_vq_get_backend(vq); - struct page_frag *alloc_frag = &net->page_frag; struct virtio_net_hdr *gso; struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; struct tun_xdp_hdr *hdr; @@ -710,6 +672,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, int sock_hlen = nvq->sock_hlen; void *buf; int copied; + int ret; if (unlikely(len < nvq->sock_hlen)) return -EFAULT; @@ -719,18 +682,17 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, return -ENOSPC; buflen += SKB_DATA_ALIGN(len + pad); - alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); - if (unlikely(!vhost_net_page_frag_refill(net, buflen, - alloc_frag, GFP_KERNEL))) + buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL, + SMP_CACHE_BYTES); + if (unlikely(!buf)) return -ENOMEM; - buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; - copied = copy_page_from_iter(alloc_frag->page, - alloc_frag->offset + - offsetof(struct tun_xdp_hdr, gso), - sock_hlen, from); - if (copied != sock_hlen) - return -EFAULT; + copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso), + sock_hlen, from); + if (copied != sock_hlen) { + ret = -EFAULT; + goto err; + } hdr = buf; gso = &hdr->gso; @@ -743,27 +705,30 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, vhost16_to_cpu(vq, gso->csum_start) + vhost16_to_cpu(vq, gso->csum_offset) + 2); - if (vhost16_to_cpu(vq, gso->hdr_len) > len) - return -EINVAL; + if (vhost16_to_cpu(vq, gso->hdr_len) > len) { + ret = -EINVAL; + goto err; + } } len -= sock_hlen; - copied = copy_page_from_iter(alloc_frag->page, - alloc_frag->offset + pad, - len, from); - if (copied != len) - return -EFAULT; + copied = copy_from_iter(buf + pad, len, from); + if (copied != len) { + ret = -EFAULT; + goto err; + } xdp_init_buff(xdp, buflen, NULL); xdp_prepare_buff(xdp, buf, pad, len, true); hdr->buflen = buflen; - --net->refcnt_bias; - alloc_frag->offset += buflen; - ++nvq->batched_xdp; return 0; + +err: + page_frag_free(buf); + return ret; } static void handle_tx_copy(struct vhost_net *net, struct socket *sock) @@ -1353,8 +1318,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) vqs[VHOST_NET_VQ_RX]); f->private_data = n; - n->page_frag.page = NULL; - n->refcnt_bias = 0; + n->pf_cache.va = NULL; return 0; } @@ -1422,8 +1386,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); kfree(n->vqs[VHOST_NET_VQ_TX].xdp); kfree(n->dev.vqs); - if (n->page_frag.page) - __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias); + page_frag_cache_drain(&n->pf_cache); kvfree(n); return 0; } diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 1183e7a871f8..46823c2e2ba1 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2399,11 +2399,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, struct fbcon_ops *ops = info->fbcon_par; struct fbcon_display *p = &fb_display[vc->vc_num]; int resize, ret, old_userfont, old_width, old_height, old_charcount; - char *old_data = NULL; + u8 *old_data = vc->vc_font.data; resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); - if (p->userfont) - old_data = vc->vc_font.data; vc->vc_font.data = (void *)(p->fontdata = data); old_userfont = p->userfont; if ((p->userfont = userfont)) @@ -2437,13 +2435,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, update_screen(vc); } - if (old_data && (--REFCOUNT(old_data) == 0)) + if (old_userfont && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; err_out: p->fontdata = old_data; - vc->vc_font.data = (void *)old_data; + vc->vc_font.data = old_data; if (userfont) { p->userfont = old_userfont; diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index c26ee6fd73c9..8fdccf033b2d 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1010,8 +1010,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) goto getmem_done; } pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n"); - } else { - goto err1; } /* diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 60685ec76d98..2e612834329a 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -105,6 +105,7 @@ struct affs_sb_info { int work_queued; /* non-zero delayed work is queued */ struct delayed_work sb_work; /* superblock flush delayed work */ spinlock_t work_lock; /* protects sb_work and work_queued */ + struct rcu_head rcu; }; #define AFFS_MOUNT_SF_INTL 0x0001 /* International filesystem. */ diff --git a/fs/affs/super.c b/fs/affs/super.c index 58b391446ae1..b56a95cf414a 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -640,7 +640,7 @@ static void affs_kill_sb(struct super_block *sb) affs_brelse(sbi->s_root_bh); kfree(sbi->s_prefix); mutex_destroy(&sbi->s_bmlock); - kfree(sbi); + kfree_rcu(sbi, rcu); } } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index b5b8de521f99..8a67fc427e74 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -479,8 +479,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, dire->u.name[0] == '.' && ctx->actor != afs_lookup_filldir && ctx->actor != afs_lookup_one_filldir && - memcmp(dire->u.name, ".__afs", 6) == 0) + memcmp(dire->u.name, ".__afs", 6) == 0) { + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); continue; + } /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, diff --git a/fs/afs/file.c b/fs/afs/file.c index 3d33b221d9ca..ef2cc8f565d2 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -417,13 +417,17 @@ static void afs_add_open_mmap(struct afs_vnode *vnode) static void afs_drop_open_mmap(struct afs_vnode *vnode) { - if (!atomic_dec_and_test(&vnode->cb_nr_mmap)) + if (atomic_add_unless(&vnode->cb_nr_mmap, -1, 1)) return; down_write(&vnode->volume->open_mmaps_lock); - if (atomic_read(&vnode->cb_nr_mmap) == 0) + read_seqlock_excl(&vnode->cb_lock); + // the only place where ->cb_nr_mmap may hit 0 + // see __afs_break_callback() for the other side... + if (atomic_dec_and_test(&vnode->cb_nr_mmap)) list_del_init(&vnode->cb_mmap_link); + read_sequnlock_excl(&vnode->cb_lock); up_write(&vnode->volume->open_mmaps_lock); flush_work(&vnode->cb_work); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 9c03fcf7ffaa..6ce5a612937c 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -321,8 +321,7 @@ struct afs_net { struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */ struct hlist_head fs_proc; /* procfs servers list */ - struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */ - struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */ + struct hlist_head fs_addresses; /* afs_server (by lowest IPv6 addr) */ seqlock_t fs_addr_lock; /* For fs_addresses[46] */ struct work_struct fs_manager; @@ -561,8 +560,7 @@ struct afs_server { struct afs_server __rcu *uuid_next; /* Next server with same UUID */ struct afs_server *uuid_prev; /* Previous server with same UUID */ struct list_head probe_link; /* Link in net->fs_probe_list */ - struct hlist_node addr4_link; /* Link in net->fs_addresses4 */ - struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ + struct hlist_node addr_link; /* Link in net->fs_addresses6 */ struct hlist_node proc_link; /* Link in net->fs_proc */ struct list_head volumes; /* RCU list of afs_server_entry objects */ struct afs_server *gc_next; /* Next server in manager's list */ diff --git a/fs/afs/main.c b/fs/afs/main.c index 1b3bd21c168a..a14f6013e316 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -90,8 +90,7 @@ static int __net_init afs_net_init(struct net *net_ns) INIT_LIST_HEAD(&net->fs_probe_slow); INIT_HLIST_HEAD(&net->fs_proc); - INIT_HLIST_HEAD(&net->fs_addresses4); - INIT_HLIST_HEAD(&net->fs_addresses6); + INIT_HLIST_HEAD(&net->fs_addresses); seqlock_init(&net->fs_addr_lock); INIT_WORK(&net->fs_manager, afs_manage_servers); diff --git a/fs/afs/server.c b/fs/afs/server.c index e169121f603e..038f9d0ae3af 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -38,7 +38,7 @@ struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&net->fs_addr_lock, &seq); - hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { + hlist_for_each_entry_rcu(server, &net->fs_addresses, addr_link) { estate = rcu_dereference(server->endpoint_state); alist = estate->addresses; for (i = 0; i < alist->nr_addrs; i++) @@ -177,10 +177,8 @@ added_dup: * bit, but anything we might want to do gets messy and memory * intensive. */ - if (alist->nr_ipv4 > 0) - hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4); - if (alist->nr_addrs > alist->nr_ipv4) - hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6); + if (alist->nr_addrs > 0) + hlist_add_head_rcu(&server->addr_link, &net->fs_addresses); write_sequnlock(&net->fs_addr_lock); @@ -511,10 +509,8 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) list_del(&server->probe_link); hlist_del_rcu(&server->proc_link); - if (!hlist_unhashed(&server->addr4_link)) - hlist_del_rcu(&server->addr4_link); - if (!hlist_unhashed(&server->addr6_link)) - hlist_del_rcu(&server->addr6_link); + if (!hlist_unhashed(&server->addr_link)) + hlist_del_rcu(&server->addr_link); } write_sequnlock(&net->fs_lock); diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 020ecd45e476..af3a3f57c1b3 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -353,7 +353,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) { struct afs_server_list *new, *old, *discard; struct afs_vldb_entry *vldb; - char idbuf[16]; + char idbuf[24]; int ret, idsz; _enter(""); @@ -361,7 +361,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* We look up an ID by passing it as a decimal string in the * operation's name parameter. */ - idsz = sprintf(idbuf, "%llu", volume->vid); + idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid); vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); if (IS_ERR(vldb)) { @@ -589,13 +589,24 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { - struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw); - struct kioctx *ctx = req->ki_ctx; + struct aio_kiocb *req; + struct kioctx *ctx; unsigned long flags; + /* + * kiocb didn't come from aio or is neither a read nor a write, hence + * ignore it. + */ + if (!(iocb->ki_flags & IOCB_AIO_RW)) + return; + + req = container_of(iocb, struct aio_kiocb, rw); + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; + ctx = req->ki_ctx; + spin_lock_irqsave(&ctx->ctx_lock, flags); list_add_tail(&req->ki_list, &ctx->active_reqs); req->ki_cancel = cancel; @@ -1509,7 +1520,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) req->ki_complete = aio_complete_rw; req->private = NULL; req->ki_pos = iocb->aio_offset; - req->ki_flags = req->ki_filp->f_iocb_flags; + req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW; if (iocb->aio_flags & IOCB_FLAG_RESFD) req->ki_flags |= IOCB_EVENTFD; if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index b4dc319bcb2b..569b97904da4 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -68,9 +68,11 @@ void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - prt_str(out, "bucket="); - bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p)); - prt_str(out, " "); + if (bch2_dev_exists2(c, k.k->p.inode)) { + prt_str(out, "bucket="); + bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p)); + prt_str(out, " "); + } bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); } diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 5467a8635be1..3ef338df82f5 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2156,7 +2156,9 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e * isn't monotonically increasing before FILTER_SNAPSHOTS, and * that's what we check against in extents mode: */ - if (k.k->p.inode > end.inode) + if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS) + ? bkey_gt(k.k->p, end) + : k.k->p.inode > end.inode)) goto end; if (iter->update_path && diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 73c12e565af5..27710cdd5710 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -303,18 +303,6 @@ void bch2_readahead(struct readahead_control *ractl) darray_exit(&readpages_iter.folios); } -static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio, - subvol_inum inum, struct folio *folio) -{ - bch2_folio_create(folio, __GFP_NOFAIL); - - rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; - rbio->bio.bi_iter.bi_sector = folio_sector(folio); - BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); - - bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0)); -} - static void bch2_read_single_folio_end_io(struct bio *bio) { complete(bio->bi_private); @@ -329,6 +317,9 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) int ret; DECLARE_COMPLETION_ONSTACK(done); + if (!bch2_folio_create(folio, GFP_KERNEL)) + return -ENOMEM; + bch2_inode_opts_get(&opts, c, &inode->ei_inode); rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), @@ -336,7 +327,11 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) rbio->bio.bi_private = &done; rbio->bio.bi_end_io = bch2_read_single_folio_end_io; - __bchfs_readfolio(c, rbio, inode_inum(inode), folio); + rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; + rbio->bio.bi_iter.bi_sector = folio_sector(folio); + BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); + + bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0)); wait_for_completion(&done); ret = blk_status_to_errno(rbio->bio.bi_status); diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index e3b219e19e10..33cb6da3a5ad 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -88,6 +88,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) return ret; shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); + if (shorten >= iter->count) + shorten = 0; iter->count -= shorten; bio = bio_alloc_bioset(NULL, diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 2cf626315652..c33dca641575 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -892,9 +892,11 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) journal_seq_pin(j, seq)->devs); seq++; - spin_unlock(&j->lock); - ret = bch2_mark_replicas(c, &replicas.e); - spin_lock(&j->lock); + if (replicas.e.nr_devs) { + spin_unlock(&j->lock); + ret = bch2_mark_replicas(c, &replicas.e); + spin_lock(&j->lock); + } } spin_unlock(&j->lock); err: diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 45f67e8b29eb..ac6ba04d5521 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -728,7 +728,7 @@ static int check_snapshot(struct btree_trans *trans, return 0; memset(&s, 0, sizeof(s)); - memcpy(&s, k.v, bkey_val_bytes(k.k)); + memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k))); id = le32_to_cpu(s.parent); if (id) { diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 231003b405ef..3a32faa86b5c 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -289,7 +289,7 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne do { nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1); } while (nr_entries == stack->size && - !(ret = darray_make_room(stack, stack->size * 2))); + !(ret = darray_make_room_gfp(stack, stack->size * 2, gfp))); stack->nr = nr_entries; up_read(&task->signal->exec_update_lock); diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index ceb5f586a2d5..1043a8142351 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -494,7 +494,7 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, block_rsv = get_block_rsv(trans, root); - if (unlikely(block_rsv->size == 0)) + if (unlikely(btrfs_block_rsv_size(block_rsv) == 0)) goto try_reserve; again: ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize); diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h index b0bd12b8652f..43a9a6b5a79f 100644 --- a/fs/btrfs/block-rsv.h +++ b/fs/btrfs/block-rsv.h @@ -101,4 +101,36 @@ static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv) return data_race(rsv->full); } +/* + * Get the reserved mount of a block reserve in a context where getting a stale + * value is acceptable, instead of accessing it directly and trigger data race + * warning from KCSAN. + */ +static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->reserved; + spin_unlock(&rsv->lock); + + return ret; +} + +/* + * Get the size of a block reserve in a context where getting a stale value is + * acceptable, instead of accessing it directly and trigger data race warning + * from KCSAN. + */ +static inline u64 btrfs_block_rsv_size(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->size; + spin_unlock(&rsv->lock); + + return ret; +} + #endif /* BTRFS_BLOCK_RSV_H */ diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 1502d664c892..79c4293ddf37 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -725,6 +725,23 @@ leave: return ret; } +static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args) +{ + if (args->start.srcdevid == 0) { + if (memchr(args->start.srcdev_name, 0, + sizeof(args->start.srcdev_name)) == NULL) + return -ENAMETOOLONG; + } else { + args->start.srcdev_name[0] = 0; + } + + if (memchr(args->start.tgtdev_name, 0, + sizeof(args->start.tgtdev_name)) == NULL) + return -ENAMETOOLONG; + + return 0; +} + int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args) { @@ -737,10 +754,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, default: return -EINVAL; } - - if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || - args->start.tgtdev_name[0] == '\0') - return -EINVAL; + ret = btrfs_check_replace_dev_names(args); + if (ret < 0) + return ret; ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name, args->start.srcdevid, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e71ef97d0a7c..c843563914ca 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) * * @objectid: root id * @anon_dev: preallocated anonymous block device number for new roots, - * pass 0 for new allocation. + * pass NULL for a new allocation. * @check_ref: whether to check root item references, If true, return -ENOENT * for orphan roots */ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev, + u64 objectid, dev_t *anon_dev, bool check_ref) { struct btrfs_root *root; @@ -1342,9 +1342,9 @@ again: * that common but still possible. In that case, we just need * to free the anon_dev. */ - if (unlikely(anon_dev)) { - free_anon_bdev(anon_dev); - anon_dev = 0; + if (unlikely(anon_dev && *anon_dev)) { + free_anon_bdev(*anon_dev); + *anon_dev = 0; } if (check_ref && btrfs_root_refs(&root->root_item) == 0) { @@ -1366,7 +1366,7 @@ again: goto fail; } - ret = btrfs_init_fs_root(root, anon_dev); + ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); if (ret) goto fail; @@ -1402,7 +1402,7 @@ fail: * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() * and once again by our caller. */ - if (anon_dev) + if (anon_dev && *anon_dev) root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); @@ -1418,7 +1418,7 @@ fail: struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref) { - return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); + return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); } /* @@ -1426,11 +1426,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, * the anonymous block device id * * @objectid: tree objectid - * @anon_dev: if zero, allocate a new anonymous block device or use the - * parameter value + * @anon_dev: if NULL, allocate a new anonymous block device or use the + * parameter value if not NULL */ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev) + u64 objectid, dev_t *anon_dev) { return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 9413726b329b..eb3473d1c1ac 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref); struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev); + u64 objectid, dev_t *anon_dev); struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e19a2475ab3..8b4bef05e222 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2480,6 +2480,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache, u64 offset, u64 phys, u64 len, u32 flags) { + u64 cache_end; int ret = 0; /* Set at the end of extent_fiemap(). */ @@ -2489,15 +2490,102 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, goto assign; /* - * Sanity check, extent_fiemap() should have ensured that new - * fiemap extent won't overlap with cached one. - * Not recoverable. + * When iterating the extents of the inode, at extent_fiemap(), we may + * find an extent that starts at an offset behind the end offset of the + * previous extent we processed. This happens if fiemap is called + * without FIEMAP_FLAG_SYNC and there are ordered extents completing + * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()). * - * NOTE: Physical address can overlap, due to compression + * For example we are in leaf X processing its last item, which is the + * file extent item for file range [512K, 1M[, and after + * btrfs_next_leaf() releases the path, there's an ordered extent that + * completes for the file range [768K, 2M[, and that results in trimming + * the file extent item so that it now corresponds to the file range + * [512K, 768K[ and a new file extent item is inserted for the file + * range [768K, 2M[, which may end up as the last item of leaf X or as + * the first item of the next leaf - in either case btrfs_next_leaf() + * will leave us with a path pointing to the new extent item, for the + * file range [768K, 2M[, since that's the first key that follows the + * last one we processed. So in order not to report overlapping extents + * to user space, we trim the length of the previously cached extent and + * emit it. + * + * Upon calling btrfs_next_leaf() we may also find an extent with an + * offset smaller than or equals to cache->offset, and this happens + * when we had a hole or prealloc extent with several delalloc ranges in + * it, but after btrfs_next_leaf() released the path, delalloc was + * flushed and the resulting ordered extents were completed, so we can + * now have found a file extent item for an offset that is smaller than + * or equals to what we have in cache->offset. We deal with this as + * described below. */ - if (cache->offset + cache->len > offset) { - WARN_ON(1); - return -EINVAL; + cache_end = cache->offset + cache->len; + if (cache_end > offset) { + if (offset == cache->offset) { + /* + * We cached a dealloc range (found in the io tree) for + * a hole or prealloc extent and we have now found a + * file extent item for the same offset. What we have + * now is more recent and up to date, so discard what + * we had in the cache and use what we have just found. + */ + goto assign; + } else if (offset > cache->offset) { + /* + * The extent range we previously found ends after the + * offset of the file extent item we found and that + * offset falls somewhere in the middle of that previous + * extent range. So adjust the range we previously found + * to end at the offset of the file extent item we have + * just found, since this extent is more up to date. + * Emit that adjusted range and cache the file extent + * item we have just found. This corresponds to the case + * where a previously found file extent item was split + * due to an ordered extent completing. + */ + cache->len = offset - cache->offset; + goto emit; + } else { + const u64 range_end = offset + len; + + /* + * The offset of the file extent item we have just found + * is behind the cached offset. This means we were + * processing a hole or prealloc extent for which we + * have found delalloc ranges (in the io tree), so what + * we have in the cache is the last delalloc range we + * found while the file extent item we found can be + * either for a whole delalloc range we previously + * emmitted or only a part of that range. + * + * We have two cases here: + * + * 1) The file extent item's range ends at or behind the + * cached extent's end. In this case just ignore the + * current file extent item because we don't want to + * overlap with previous ranges that may have been + * emmitted already; + * + * 2) The file extent item starts behind the currently + * cached extent but its end offset goes beyond the + * end offset of the cached extent. We don't want to + * overlap with a previous range that may have been + * emmitted already, so we emit the currently cached + * extent and then partially store the current file + * extent item's range in the cache, for the subrange + * going the cached extent's end to the end of the + * file extent item. + */ + if (range_end <= cache_end) + return 0; + + if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC))) + phys += cache_end - offset; + + offset = cache_end; + len = range_end - cache_end; + goto emit; + } } /* @@ -2517,6 +2605,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, return 0; } +emit: /* Not mergeable, need to submit cached one */ ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, cache->len, cache->flags); @@ -2907,17 +2996,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, range_end = round_up(start + len, sectorsize); prev_extent_end = range_start; - btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); - ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); if (ret < 0) - goto out_unlock; + goto out; btrfs_release_path(path); path->reada = READA_FORWARD; ret = fiemap_search_slot(inode, path, range_start); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* * No file extent item found, but we may have delalloc between @@ -2964,7 +3051,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, backref_ctx, 0, 0, 0, prev_extent_end, hole_end); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* fiemap_fill_next_extent() told us to stop. */ stopped = true; @@ -3020,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, extent_gen, backref_ctx); if (ret < 0) - goto out_unlock; + goto out; else if (ret > 0) flags |= FIEMAP_EXTENT_SHARED; } @@ -3031,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* fiemap_fill_next_extent() told us to stop. */ stopped = true; @@ -3042,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, next_item: if (fatal_signal_pending(current)) { ret = -EINTR; - goto out_unlock; + goto out; } ret = fiemap_next_leaf_item(inode, path); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* No more file extent items for this inode. */ break; @@ -3071,7 +3158,7 @@ check_eof_delalloc: &delalloc_cached_state, backref_ctx, 0, 0, 0, prev_extent_end, range_end - 1); if (ret < 0) - goto out_unlock; + goto out; prev_extent_end = range_end; } @@ -3109,9 +3196,6 @@ check_eof_delalloc: } ret = emit_last_fiemap_cache(fieinfo, &cache); - -out_unlock: - btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); out: free_extent_state(delalloc_cached_state); btrfs_free_backref_share_ctx(backref_ctx); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f88e0ca8331d..4795738d5785 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7835,6 +7835,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { + struct btrfs_inode *btrfs_inode = BTRFS_I(inode); int ret; ret = fiemap_prep(inode, fieinfo, start, &len, 0); @@ -7860,7 +7861,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return ret; } - return extent_fiemap(BTRFS_I(inode), fieinfo, start, len); + btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED); + + /* + * We did an initial flush to avoid holding the inode's lock while + * triggering writeback and waiting for the completion of IO and ordered + * extents. Now after we locked the inode we do it again, because it's + * possible a new write may have happened in between those two steps. + */ + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { + ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX); + if (ret) { + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); + return ret; + } + } + + ret = extent_fiemap(btrfs_inode, fieinfo, start, len); + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); + + return ret; } static int btrfs_writepages(struct address_space *mapping, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ac3316e0d11c..9d1eac15e09e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap, free_extent_buffer(leaf); leaf = NULL; - new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); + new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7902298c1f25..e48a063ef085 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6705,11 +6705,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret) goto out; } - if (sctx->cur_inode_last_extent < - sctx->cur_inode_size) { - ret = send_hole(sctx, sctx->cur_inode_size); - if (ret) + if (sctx->cur_inode_last_extent < sctx->cur_inode_size) { + ret = range_is_hole_in_parent(sctx, + sctx->cur_inode_last_extent, + sctx->cur_inode_size); + if (ret < 0) { goto out; + } else if (ret == 0) { + ret = send_hole(sctx, sctx->cur_inode_size); + if (ret < 0) + goto out; + } else { + /* Range is already a hole, skip. */ + ret = 0; + } } } if (need_truncate) { diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 571bb13587d5..3b54eb583474 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -856,7 +856,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info) { - u64 global_rsv_size = fs_info->global_block_rsv.reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv); u64 ordered, delalloc; u64 thresh; u64 used; @@ -956,8 +956,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1; delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes); if (ordered >= delalloc) - used += fs_info->delayed_refs_rsv.reserved + - fs_info->delayed_block_rsv.reserved; + used += btrfs_block_rsv_reserved(&fs_info->delayed_refs_rsv) + + btrfs_block_rsv_reserved(&fs_info->delayed_block_rsv); else used += space_info->bytes_may_use - global_rsv_size; @@ -1173,7 +1173,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) enum btrfs_flush_state flush; u64 delalloc_size = 0; u64 to_reclaim, block_rsv_size; - u64 global_rsv_size = global_rsv->reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv); loops++; @@ -1185,9 +1185,9 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) * assume it's tied up in delalloc reservations. */ block_rsv_size = global_rsv_size + - delayed_block_rsv->reserved + - delayed_refs_rsv->reserved + - trans_rsv->reserved; + btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv) + + btrfs_block_rsv_reserved(trans_rsv); if (block_rsv_size < space_info->bytes_may_use) delalloc_size = space_info->bytes_may_use - block_rsv_size; @@ -1207,16 +1207,16 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) to_reclaim = delalloc_size; flush = FLUSH_DELALLOC; } else if (space_info->bytes_pinned > - (delayed_block_rsv->reserved + - delayed_refs_rsv->reserved)) { + (btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv))) { to_reclaim = space_info->bytes_pinned; flush = COMMIT_TRANS; - } else if (delayed_block_rsv->reserved > - delayed_refs_rsv->reserved) { - to_reclaim = delayed_block_rsv->reserved; + } else if (btrfs_block_rsv_reserved(delayed_block_rsv) > + btrfs_block_rsv_reserved(delayed_refs_rsv)) { + to_reclaim = btrfs_block_rsv_reserved(delayed_block_rsv); flush = FLUSH_DELAYED_ITEMS_NR; } else { - to_reclaim = delayed_refs_rsv->reserved; + to_reclaim = btrfs_block_rsv_reserved(delayed_refs_rsv); flush = FLUSH_DELAYED_REFS_NR; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c52807d97efa..bf8e64c766b6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1834,7 +1834,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } key.offset = (u64)-1; - pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); + pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); pending->snap = NULL; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 3a5d69ff25fc..5f750fa53a2b 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1639,6 +1639,15 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } out: + /* Reject non SINGLE data profiles without RST */ + if ((map->type & BTRFS_BLOCK_GROUP_DATA) && + (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && + !fs_info->stripe_root) { + btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", + btrfs_bg_type_to_raid_name(map->type)); + return -EINVAL; + } + if (cache->alloc_offset > cache->zone_capacity) { btrfs_err(fs_info, "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index 7077f72e6f47..f449f7340aad 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -168,6 +168,8 @@ error_unsupported: dput(root); error_open_root: cachefiles_end_secure(cache, saved_cred); + put_cred(cache->cache_cred); + cache->cache_cred = NULL; error_getsec: fscache_relinquish_cache(cache_cookie); cache->cache = NULL; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 3f24905f4066..6465e2574230 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -816,6 +816,7 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) cachefiles_put_directory(cache->graveyard); cachefiles_put_directory(cache->store); mntput(cache->mnt); + put_cred(cache->cache_cred); kfree(cache->rootdirname); kfree(cache->secctx); diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index fae97c25ce58..8109aba66e02 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -380,10 +380,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, ceph_decode_skip_8(p, end, bad_ext); /* required_client_features */ ceph_decode_skip_set(p, end, 64, bad_ext); + /* bal_rank_mask */ + ceph_decode_skip_string(p, end, bad_ext); + } + if (mdsmap_ev >= 18) { ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext); - } else { - /* This forces the usage of the (sync) SETXATTR Op */ - m->m_max_xattr_size = 0; } bad_ext: doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h index 89f1931f1ba6..1f2171dd01bf 100644 --- a/fs/ceph/mdsmap.h +++ b/fs/ceph/mdsmap.h @@ -27,7 +27,11 @@ struct ceph_mdsmap { u32 m_session_timeout; /* seconds */ u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ + /* + * maximum size for xattrs blob. + * Zeroed by default to force the usage of the (sync) SETXATTR Op. + */ + u64 m_max_xattr_size; u32 m_max_mds; /* expected up:active mds number */ u32 m_num_active_mds; /* actual up:active mds number */ u32 possible_max_rank; /* possible max rank index */ diff --git a/fs/coredump.c b/fs/coredump.c index f258c17c1841..be6403b4b14b 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -872,6 +872,9 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) loff_t pos; ssize_t n; + if (!page) + return 0; + if (cprm->to_skip) { if (!__dump_skip(cprm, cprm->to_skip)) return 0; @@ -884,7 +887,6 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) pos = file->f_pos; bvec_set_page(&bvec, page, PAGE_SIZE, 0); iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE); - iov_iter_set_copy_mc(&iter); n = __kernel_write_iter(cprm->file, &iter, &pos); if (n != PAGE_SIZE) return 0; @@ -895,10 +897,44 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) return 1; } +/* + * If we might get machine checks from kernel accesses during the + * core dump, let's get those errors early rather than during the + * IO. This is not performance-critical enough to warrant having + * all the machine check logic in the iovec paths. + */ +#ifdef copy_mc_to_kernel + +#define dump_page_alloc() alloc_page(GFP_KERNEL) +#define dump_page_free(x) __free_page(x) +static struct page *dump_page_copy(struct page *src, struct page *dst) +{ + void *buf = kmap_local_page(src); + size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE); + kunmap_local(buf); + return left ? NULL : dst; +} + +#else + +/* We just want to return non-NULL; it's never used. */ +#define dump_page_alloc() ERR_PTR(-EINVAL) +#define dump_page_free(x) ((void)(x)) +static inline struct page *dump_page_copy(struct page *src, struct page *dst) +{ + return src; +} +#endif + int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len) { unsigned long addr; + struct page *dump_page; + + dump_page = dump_page_alloc(); + if (!dump_page) + return 0; for (addr = start; addr < start + len; addr += PAGE_SIZE) { struct page *page; @@ -912,14 +948,17 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, */ page = get_dump_page(addr); if (page) { - int stop = !dump_emit_page(cprm, page); + int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page)); put_page(page); - if (stop) + if (stop) { + dump_page_free(dump_page); return 0; + } } else { dump_skip(cprm, PAGE_SIZE); } } + dump_page_free(dump_page); return 1; } #endif diff --git a/fs/dcache.c b/fs/dcache.c index b813528fb147..6ebccba33336 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3061,7 +3061,10 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) if (d_unhashed(dentry) || !dentry->d_inode) return D_WALK_SKIP; - dentry->d_lockref.count--; + if (!(dentry->d_flags & DCACHE_GENOCIDE)) { + dentry->d_flags |= DCACHE_GENOCIDE; + dentry->d_lockref.count--; + } } return D_WALK_CONTINUE; } diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h index 169252e6dc46..f7206158ee81 100644 --- a/fs/efivarfs/internal.h +++ b/fs/efivarfs/internal.h @@ -38,7 +38,7 @@ struct efivar_entry { int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, struct list_head *), - void *data, bool duplicates, struct list_head *head); + void *data, struct list_head *head); int efivar_entry_add(struct efivar_entry *entry, struct list_head *head); void __efivar_entry_add(struct efivar_entry *entry, struct list_head *head); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 6038dd39367a..bb14462f6d99 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -343,12 +343,7 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; - err = efivar_init(efivarfs_callback, (void *)sb, true, - &sfi->efivarfs_list); - if (err) - efivar_entry_iter(efivarfs_destroy, &sfi->efivarfs_list, NULL); - - return err; + return efivar_init(efivarfs_callback, sb, &sfi->efivarfs_list); } static int efivarfs_get_tree(struct fs_context *fc) diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c index 114ff0fd4e55..4d722af1014f 100644 --- a/fs/efivarfs/vars.c +++ b/fs/efivarfs/vars.c @@ -361,7 +361,6 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, * efivar_init - build the initial list of EFI variables * @func: callback function to invoke for every variable * @data: function-specific data to pass to @func - * @duplicates: error if we encounter duplicates on @head? * @head: initialised head of variable list * * Get every EFI variable from the firmware and invoke @func. @func @@ -371,9 +370,9 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, */ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, struct list_head *), - void *data, bool duplicates, struct list_head *head) + void *data, struct list_head *head) { - unsigned long variable_name_size = 1024; + unsigned long variable_name_size = 512; efi_char16_t *variable_name; efi_status_t status; efi_guid_t vendor_guid; @@ -390,12 +389,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, goto free; /* - * Per EFI spec, the maximum storage allocated for both - * the variable name and variable data is 1024 bytes. + * A small set of old UEFI implementations reject sizes + * above a certain threshold, the lowest seen in the wild + * is 512. */ do { - variable_name_size = 1024; + variable_name_size = 512; status = efivar_get_next_variable(&variable_name_size, variable_name, @@ -413,8 +413,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, * we'll ever see a different variable name, * and may end up looping here forever. */ - if (duplicates && - variable_is_present(variable_name, &vendor_guid, + if (variable_is_present(variable_name, &vendor_guid, head)) { dup_variable_bug(variable_name, &vendor_guid, variable_name_size); @@ -432,9 +431,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, break; case EFI_NOT_FOUND: break; + case EFI_BUFFER_TOO_SMALL: + pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n", + variable_name_size); + status = EFI_NOT_FOUND; + break; default: - printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n", - status); + pr_warn("efivars: get_next_variable: status=%lx\n", status); status = EFI_NOT_FOUND; break; } diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 5ff90026fd43..89a7c2453aae 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -381,11 +381,12 @@ static int erofs_fscache_init_domain(struct super_block *sb) goto out; if (!erofs_pseudo_mnt) { - erofs_pseudo_mnt = kern_mount(&erofs_fs_type); - if (IS_ERR(erofs_pseudo_mnt)) { - err = PTR_ERR(erofs_pseudo_mnt); + struct vfsmount *mnt = kern_mount(&erofs_fs_type); + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); goto out; } + erofs_pseudo_mnt = mnt; } domain->volume = sbi->volume; diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index d4f631d39f0f..f0110a78acb2 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -130,24 +130,24 @@ static void *erofs_find_target_block(struct erofs_buf *target, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - if (!diff) { - *_ndirents = 0; - goto out; - } else if (diff > 0) { - head = mid + 1; - startprfx = matched; - - if (!IS_ERR(candidate)) - erofs_put_metabuf(target); - *target = buf; - candidate = de; - *_ndirents = ndirents; - } else { + if (diff < 0) { erofs_put_metabuf(&buf); - back = mid - 1; endprfx = matched; + continue; + } + + if (!IS_ERR(candidate)) + erofs_put_metabuf(target); + *target = buf; + if (!diff) { + *_ndirents = 0; + return de; } + head = mid + 1; + startprfx = matched; + candidate = de; + *_ndirents = ndirents; continue; } out: /* free if the candidate is valid */ diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 9474cd50da6d..361595433480 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -275,6 +275,7 @@ struct exfat_sb_info { spinlock_t inode_hash_lock; struct hlist_head inode_hashtable[EXFAT_HASH_SIZE]; + struct rcu_head rcu; }; #define EXFAT_CACHE_VALID 0 diff --git a/fs/exfat/file.c b/fs/exfat/file.c index d25a96a148af..cc00f1a7a1e1 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -35,13 +35,18 @@ static int exfat_cont_expand(struct inode *inode, loff_t size) if (new_num_clusters == num_clusters) goto out; - exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags); - ret = exfat_find_last_cluster(sb, &clu, &last_clu); - if (ret) - return ret; + if (num_clusters) { + exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags); + ret = exfat_find_last_cluster(sb, &clu, &last_clu); + if (ret) + return ret; + + clu.dir = last_clu + 1; + } else { + last_clu = EXFAT_EOF_CLUSTER; + clu.dir = EXFAT_EOF_CLUSTER; + } - clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ? - EXFAT_EOF_CLUSTER : last_clu + 1; clu.size = 0; clu.flags = ei->flags; @@ -51,17 +56,19 @@ static int exfat_cont_expand(struct inode *inode, loff_t size) return ret; /* Append new clusters to chain */ - if (clu.flags != ei->flags) { - exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters); - ei->flags = ALLOC_FAT_CHAIN; - } - if (clu.flags == ALLOC_FAT_CHAIN) - if (exfat_ent_set(sb, last_clu, clu.dir)) - goto free_clu; - - if (num_clusters == 0) + if (num_clusters) { + if (clu.flags != ei->flags) + if (exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters)) + goto free_clu; + + if (clu.flags == ALLOC_FAT_CHAIN) + if (exfat_ent_set(sb, last_clu, clu.dir)) + goto free_clu; + } else ei->start_clu = clu.dir; + ei->flags = clu.flags; + out: inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); /* Expanded range not zeroed, do not update valid_size */ diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 705710f93e2d..afdf13c34ff5 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -655,7 +655,6 @@ static int exfat_load_upcase_table(struct super_block *sb, unsigned int sect_size = sb->s_blocksize; unsigned int i, index = 0; u32 chksum = 0; - int ret; unsigned char skip = false; unsigned short *upcase_table; @@ -673,8 +672,7 @@ static int exfat_load_upcase_table(struct super_block *sb, if (!bh) { exfat_err(sb, "failed to read sector(0x%llx)", (unsigned long long)sector); - ret = -EIO; - goto free_table; + return -EIO; } sector++; for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) { @@ -701,15 +699,12 @@ static int exfat_load_upcase_table(struct super_block *sb, exfat_err(sb, "failed to load upcase table (idx : 0x%08x, chksum : 0x%08x, utbl_chksum : 0x%08x)", index, chksum, utbl_checksum); - ret = -EINVAL; -free_table: - exfat_free_upcase_table(sbi); - return ret; + return -EINVAL; } static int exfat_load_default_upcase_table(struct super_block *sb) { - int i, ret = -EIO; + int i; struct exfat_sb_info *sbi = EXFAT_SB(sb); unsigned char skip = false; unsigned short uni = 0, *upcase_table; @@ -740,8 +735,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb) return 0; /* FATAL error: default upcase table has error */ - exfat_free_upcase_table(sbi); - return ret; + return -EIO; } int exfat_create_upcase_table(struct super_block *sb) diff --git a/fs/exfat/super.c b/fs/exfat/super.c index d9d4fa91010b..fcb658267765 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -39,9 +39,6 @@ static void exfat_put_super(struct super_block *sb) exfat_free_bitmap(sbi); brelse(sbi->boot_bh); mutex_unlock(&sbi->s_lock); - - unload_nls(sbi->nls_io); - exfat_free_upcase_table(sbi); } static int exfat_sync_fs(struct super_block *sb, int wait) @@ -600,7 +597,7 @@ static int __exfat_fill_super(struct super_block *sb) ret = exfat_load_bitmap(sb); if (ret) { exfat_err(sb, "failed to load alloc-bitmap"); - goto free_upcase_table; + goto free_bh; } ret = exfat_count_used_clusters(sb, &sbi->used_clusters); @@ -613,8 +610,6 @@ static int __exfat_fill_super(struct super_block *sb) free_alloc_bitmap: exfat_free_bitmap(sbi); -free_upcase_table: - exfat_free_upcase_table(sbi); free_bh: brelse(sbi->boot_bh); return ret; @@ -701,12 +696,10 @@ put_inode: sb->s_root = NULL; free_table: - exfat_free_upcase_table(sbi); exfat_free_bitmap(sbi); brelse(sbi->boot_bh); check_nls_io: - unload_nls(sbi->nls_io); return err; } @@ -771,13 +764,22 @@ static int exfat_init_fs_context(struct fs_context *fc) return 0; } +static void delayed_free(struct rcu_head *p) +{ + struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu); + + unload_nls(sbi->nls_io); + exfat_free_upcase_table(sbi); + exfat_free_sbi(sbi); +} + static void exfat_kill_sb(struct super_block *sb) { struct exfat_sb_info *sbi = sb->s_fs_info; kill_block_super(sb); if (sbi) - exfat_free_sbi(sbi); + call_rcu(&sbi->rcu, delayed_free); } static struct file_system_type exfat_fs_type = { diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 75bf1f88843c..645240cc0229 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -92,10 +92,12 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode, if (!dentry) { bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT); - if (IS_ERR(bh)) - return ERR_CAST(bh); - if (!bh || !ext4_buffer_uptodate(bh)) + if (IS_ERR(bh) || !bh) return ERR_PTR(-ECHILD); + if (!ext4_buffer_uptodate(bh)) { + brelse(bh); + return ERR_PTR(-ECHILD); + } } else { bh = ext4_bread(NULL, inode, 0, 0); if (IS_ERR(bh)) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 91e89e68177e..b6cad106c37e 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -474,8 +474,7 @@ err: static void cuse_fc_release(struct fuse_conn *fc) { - struct cuse_conn *cc = fc_to_cc(fc); - kfree_rcu(cc, fc.rcu); + kfree(fc_to_cc(fc)); } /** diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1df83eebda92..bcbe34488862 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -888,6 +888,7 @@ struct fuse_mount { /* Entry on fc->mounts */ struct list_head fc_entry; + struct rcu_head rcu; }; static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2a6d44f91729..516ea2979a90 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -930,6 +930,14 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, } EXPORT_SYMBOL_GPL(fuse_conn_init); +static void delayed_release(struct rcu_head *p) +{ + struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); + + put_user_ns(fc->user_ns); + fc->release(fc); +} + void fuse_conn_put(struct fuse_conn *fc) { if (refcount_dec_and_test(&fc->count)) { @@ -941,13 +949,12 @@ void fuse_conn_put(struct fuse_conn *fc) if (fiq->ops->release) fiq->ops->release(fiq); put_pid_ns(fc->pid_ns); - put_user_ns(fc->user_ns); bucket = rcu_dereference_protected(fc->curr_bucket, 1); if (bucket) { WARN_ON(atomic_read(&bucket->count) != 1); kfree(bucket); } - fc->release(fc); + call_rcu(&fc->rcu, delayed_release); } } EXPORT_SYMBOL_GPL(fuse_conn_put); @@ -1366,7 +1373,7 @@ EXPORT_SYMBOL_GPL(fuse_send_init); void fuse_free_conn(struct fuse_conn *fc) { WARN_ON(!list_empty(&fc->devices)); - kfree_rcu(fc, rcu); + kfree(fc); } EXPORT_SYMBOL_GPL(fuse_free_conn); @@ -1902,7 +1909,7 @@ static void fuse_sb_destroy(struct super_block *sb) void fuse_mount_destroy(struct fuse_mount *fm) { fuse_conn_put(fm->fc); - kfree(fm); + kfree_rcu(fm, rcu); } EXPORT_SYMBOL(fuse_mount_destroy); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 7ededcb720c1..012a3d003fbe 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -190,6 +190,7 @@ struct hfsplus_sb_info { int work_queued; /* non-zero delayed work is queued */ struct delayed_work sync_work; /* FS sync delayed work */ spinlock_t work_lock; /* protects sync_work and work_queued */ + struct rcu_head rcu; }; #define HFSPLUS_SB_WRITEBACKUP 0 diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 1986b4f18a90..97920202790f 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -277,6 +277,14 @@ void hfsplus_mark_mdb_dirty(struct super_block *sb) spin_unlock(&sbi->work_lock); } +static void delayed_free(struct rcu_head *p) +{ + struct hfsplus_sb_info *sbi = container_of(p, struct hfsplus_sb_info, rcu); + + unload_nls(sbi->nls); + kfree(sbi); +} + static void hfsplus_put_super(struct super_block *sb) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); @@ -302,9 +310,7 @@ static void hfsplus_put_super(struct super_block *sb) hfs_btree_close(sbi->ext_tree); kfree(sbi->s_vhdr_buf); kfree(sbi->s_backup_vhdr_buf); - unload_nls(sbi->nls); - kfree(sb->s_fs_info); - sb->s_fs_info = NULL; + call_rcu(&sbi->rcu, delayed_free); } static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/namei.c b/fs/namei.c index 4e0de939fea1..9342fa6a38c2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1717,7 +1717,11 @@ static inline int may_lookup(struct mnt_idmap *idmap, { if (nd->flags & LOOKUP_RCU) { int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); - if (err != -ECHILD || !try_to_unlazy(nd)) + if (!err) // success, keep going + return 0; + if (!try_to_unlazy(nd)) + return -ECHILD; // redo it all non-lazy + if (err != -ECHILD) // hard error return err; } return inode_permission(idmap, nd->inode, MAY_EXEC); diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index a3059b3168fd..9a0d32e4b422 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -477,6 +477,9 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); + if (!iov_iter_count(from)) + return 0; + if ((iocb->ki_flags & IOCB_DIRECT) || test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) return netfs_unbuffered_write_iter(iocb, from); diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index 60a40d293c87..bee047e20f5d 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -139,6 +139,9 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); + if (!iov_iter_count(from)) + return 0; + trace_netfs_write_iter(iocb, from); netfs_stat(&netfs_n_rh_dio_write); @@ -146,7 +149,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret < 0) return ret; ret = generic_write_checks(iocb, from); - if (ret < 0) + if (ret <= 0) goto out; ret = file_remove_privs(file); if (ret < 0) diff --git a/fs/netfs/io.c b/fs/netfs/io.c index e8ff1e61ce79..4261ad6c55b6 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -748,6 +748,8 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) if (!rreq->submitted) { netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit); + if (rreq->origin == NETFS_DIO_READ) + inode_dio_end(rreq->inode); ret = 0; goto out; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 44eca51b2808..fbdc9ca80f71 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -246,7 +246,7 @@ void nfs_free_client(struct nfs_client *clp) put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); kfree(clp->cl_acceptor); - kfree(clp); + kfree_rcu(clp, rcu); } EXPORT_SYMBOL_GPL(nfs_free_client); @@ -1006,6 +1006,14 @@ struct nfs_server *nfs_alloc_server(void) } EXPORT_SYMBOL_GPL(nfs_alloc_server); +static void delayed_free(struct rcu_head *p) +{ + struct nfs_server *server = container_of(p, struct nfs_server, rcu); + + nfs_free_iostats(server->io_stats); + kfree(server); +} + /* * Free up a server record */ @@ -1031,10 +1039,9 @@ void nfs_free_server(struct nfs_server *server) ida_destroy(&server->lockowner_id); ida_destroy(&server->openowner_id); - nfs_free_iostats(server->io_stats); put_cred(server->cred); - kfree(server); nfs_release_automount_timer(); + call_rcu(&server->rcu, delayed_free); } EXPORT_SYMBOL_GPL(nfs_free_server); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c8ecbe999059..ac505671efbd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1431,9 +1431,9 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry) static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) { struct inode *inode = d_inode(dentry); - struct inode *dir = d_inode(dentry->d_parent); + struct inode *dir = d_inode_rcu(dentry->d_parent); - if (!nfs_verify_change_attribute(dir, verf)) + if (!dir || !nfs_verify_change_attribute(dir, verf)) return; if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) nfs_set_verifier_delegated(&verf); diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 3b42938a9d3b..7f27382e0ce2 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -2457,7 +2457,6 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, struct ATTR_LIST_ENTRY *le = NULL; struct runs_tree *run = &ni->file.run; u64 valid_size = ni->i_valid; - loff_t i_size = i_size_read(&ni->vfs_inode); u64 vbo_disk; size_t unc_size; u32 frame_size, i, npages_disk, ondisk_size; @@ -2509,6 +2508,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, err = -EOPNOTSUPP; goto out1; #else + loff_t i_size = i_size_read(&ni->vfs_inode); u32 frame_bits = ni_ext_compress_bits(ni); u64 frame64 = frame_vbo >> frame_bits; u64 frames, vbo_data; diff --git a/fs/proc/base.c b/fs/proc/base.c index 98a031ac2648..18550c071d71 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1878,8 +1878,6 @@ void proc_pid_evict_inode(struct proc_inode *ei) hlist_del_init_rcu(&ei->sibling_inodes); spin_unlock(&pid->lock); } - - put_pid(pid); } struct inode *proc_pid_make_inode(struct super_block *sb, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index b33e490e3fd9..05350f3c2812 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -30,7 +30,6 @@ static void proc_evict_inode(struct inode *inode) { - struct proc_dir_entry *de; struct ctl_table_header *head; struct proc_inode *ei = PROC_I(inode); @@ -38,17 +37,8 @@ static void proc_evict_inode(struct inode *inode) clear_inode(inode); /* Stop tracking associated processes */ - if (ei->pid) { + if (ei->pid) proc_pid_evict_inode(ei); - ei->pid = NULL; - } - - /* Let go of any associated proc directory entry */ - de = ei->pde; - if (de) { - pde_put(de); - ei->pde = NULL; - } head = ei->sysctl; if (head) { @@ -80,6 +70,13 @@ static struct inode *proc_alloc_inode(struct super_block *sb) static void proc_free_inode(struct inode *inode) { + struct proc_inode *ei = PROC_I(inode); + + if (ei->pid) + put_pid(ei->pid); + /* Let go of any associated proc directory entry */ + if (ei->pde) + pde_put(ei->pde); kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } diff --git a/fs/proc/root.c b/fs/proc/root.c index b55dbc70287b..06a297a27ba3 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -271,7 +271,7 @@ static void proc_kill_sb(struct super_block *sb) kill_anon_super(sb); put_pid_ns(fs_info->pid_ns); - kfree(fs_info); + kfree_rcu(fs_info, rcu); } static struct file_system_type proc_fs_type = { diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 2a4a4e3a8751..0c269396ae15 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1172,6 +1172,9 @@ const char *cifs_get_link(struct dentry *dentry, struct inode *inode, { char *target_path; + if (!dentry) + return ERR_PTR(-ECHILD); + target_path = kmalloc(PATH_MAX, GFP_KERNEL); if (!target_path) return ERR_PTR(-ENOMEM); diff --git a/fs/super.c b/fs/super.c index d35e85295489..d6efeba0d0ce 100644 --- a/fs/super.c +++ b/fs/super.c @@ -274,9 +274,10 @@ static void destroy_super_work(struct work_struct *work) { struct super_block *s = container_of(work, struct super_block, destroy_work); - int i; - - for (i = 0; i < SB_FREEZE_LEVELS; i++) + security_sb_free(s); + put_user_ns(s->s_user_ns); + kfree(s->s_subtype); + for (int i = 0; i < SB_FREEZE_LEVELS; i++) percpu_free_rwsem(&s->s_writers.rw_sem[i]); kfree(s); } @@ -296,9 +297,6 @@ static void destroy_unused_super(struct super_block *s) super_unlock_excl(s); list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_inode_lru); - security_sb_free(s); - put_user_ns(s->s_user_ns); - kfree(s->s_subtype); shrinker_free(s->s_shrink); /* no delays needed */ destroy_super_work(&s->destroy_work); @@ -409,9 +407,6 @@ static void __put_super(struct super_block *s) WARN_ON(s->s_dentry_lru.node); WARN_ON(s->s_inode_lru.node); WARN_ON(!list_empty(&s->s_mounts)); - security_sb_free(s); - put_user_ns(s->s_user_ns); - kfree(s->s_subtype); call_rcu(&s->rcu, destroy_super_rcu); } } diff --git a/fs/verity/measure.c b/fs/verity/measure.c index bf7a5f4cccaf..3969d54158d1 100644 --- a/fs/verity/measure.c +++ b/fs/verity/measure.c @@ -159,9 +159,9 @@ __bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr_ker __bpf_kfunc_end_defs(); -BTF_SET8_START(fsverity_set_ids) +BTF_KFUNCS_START(fsverity_set_ids) BTF_ID_FLAGS(func, bpf_get_fsverity_digest, KF_TRUSTED_ARGS) -BTF_SET8_END(fsverity_set_ids) +BTF_KFUNCS_END(fsverity_set_ids) static int bpf_get_fsverity_digest_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 5a2512d20bd0..98401de832ee 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -350,7 +350,6 @@ xfs_setup_dax_always( return -EINVAL; } - xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); return 0; disable_dax: diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h index c4c423e97f06..4453906105ca 100644 --- a/include/drm/bridge/aux-bridge.h +++ b/include/drm/bridge/aux-bridge.h @@ -9,6 +9,8 @@ #include <drm/drm_connector.h> +struct auxiliary_device; + #if IS_ENABLED(CONFIG_DRM_AUX_BRIDGE) int drm_aux_bridge_register(struct device *parent); #else @@ -19,10 +21,23 @@ static inline int drm_aux_bridge_register(struct device *parent) #endif #if IS_ENABLED(CONFIG_DRM_AUX_HPD_BRIDGE) +struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np); +int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev); struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np); void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status); #else +static inline struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, + struct device_node *np) +{ + return NULL; +} + +static inline int devm_drm_dp_hpd_bridge_add(struct auxiliary_device *adev) +{ + return 0; +} + static inline struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np) { diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a789266feac3..fb3c3e7181e6 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -196,7 +196,8 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \ - cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \ + cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS) && sk && \ + sk_fullsock(sk)) \ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ CGROUP_INET_INGRESS); \ \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b86bd15a051d..814dc913a968 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -251,10 +251,7 @@ struct bpf_list_node_kern { } __attribute__((aligned(8))); struct bpf_map { - /* The first two cachelines with read-mostly members of which some - * are also accessed in fast-path (e.g. ops, max_entries). - */ - const struct bpf_map_ops *ops ____cacheline_aligned; + const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY void *security; @@ -276,17 +273,14 @@ struct bpf_map { struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - /* The 3rd and 4th cacheline with misc members to avoid false sharing - * particularly with refcounting. - */ - atomic64_t refcnt ____cacheline_aligned; + struct mutex freeze_mutex; + atomic64_t refcnt; atomic64_t usercnt; /* rcu is used before freeing and work is only used during freeing */ union { struct work_struct work; struct rcu_head rcu; }; - struct mutex freeze_mutex; atomic64_t writecnt; /* 'Ownership' of program-containing map is claimed by the first program * that is going to use this map or by the first program which FD is @@ -1189,7 +1183,6 @@ struct bpf_trampoline { int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; - struct module *mod; }; struct bpf_attach_target_info { @@ -1416,6 +1409,7 @@ struct bpf_jit_poke_descriptor { struct bpf_ctx_arg_aux { u32 offset; enum bpf_reg_type reg_type; + struct btf *btf; u32 btf_id; }; @@ -1709,6 +1703,19 @@ struct bpf_struct_ops { struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; }; +/* Every member of a struct_ops type has an instance even a member is not + * an operator (function pointer). The "info" field will be assigned to + * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the + * argument information required by the verifier to verify the program. + * + * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the + * corresponding entry for an given argument. + */ +struct bpf_struct_ops_arg_info { + struct bpf_ctx_arg_aux *info; + u32 cnt; +}; + struct bpf_struct_ops_desc { struct bpf_struct_ops *st_ops; @@ -1716,6 +1723,9 @@ struct bpf_struct_ops_desc { const struct btf_type *value_type; u32 type_id; u32 value_id; + + /* Collection of argument information for each member */ + struct bpf_struct_ops_arg_info *arg_info; }; enum bpf_struct_ops_state { @@ -1790,6 +1800,7 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, struct btf *btf, struct bpf_verifier_log *log); void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map); +void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc); #else #define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; }) static inline bool bpf_try_module_get(const void *data, struct module *owner) @@ -1814,6 +1825,10 @@ static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struc { } +static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc) +{ +} + #endif #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 173ec7f43ed1..dcddb0aef7d8 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -129,10 +129,36 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache, bool bpf_ma); -struct bpf_local_storage_data * +void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem); +/* If cacheit_lockit is false, this lookup function is lockless */ +static inline struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, - bool cacheit_lockit); + bool cacheit_lockit) +{ + struct bpf_local_storage_data *sdata; + struct bpf_local_storage_elem *selem; + + /* Fast path (cache hit) */ + sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], + bpf_rcu_lock_held()); + if (sdata && rcu_access_pointer(sdata->smap) == smap) + return sdata; + + /* Slow path (cache miss) */ + hlist_for_each_entry_rcu(selem, &local_storage->list, snode, + rcu_read_lock_trace_held()) + if (rcu_access_pointer(SDATA(selem)->smap) == smap) + break; + + if (!selem) + return NULL; + if (cacheit_lockit) + __bpf_local_storage_insert_cache(local_storage, smap, selem); + return SDATA(selem); +} void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7f5816482a10..84365e6dd85d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -610,6 +610,7 @@ struct bpf_subprog_arg_info { enum bpf_arg_type arg_type; union { u32 mem_size; + u32 btf_id; }; }; @@ -918,6 +919,15 @@ static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env) env->scratched_stack_slots = ~0ULL; } +static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_size) +{ +#ifdef __BIG_ENDIAN + off -= spill_size - fill_size; +#endif + + return !(off % BPF_REG_SIZE); +} + const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type); const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); diff --git a/include/linux/btf.h b/include/linux/btf.h index 1ee8977b8c95..f9e56fd12a9f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -495,6 +495,12 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); } +bool btf_param_match_suffix(const struct btf *btf, + const struct btf_param *arg, + const char *suffix); +int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, + u32 arg_no); + struct bpf_verifier_log; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) @@ -525,10 +531,9 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); -const struct btf_type * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg); +bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg); int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type); bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2); @@ -568,12 +573,12 @@ static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf { return NULL; } -static inline const struct btf_member * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg) +static inline bool +btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg) { - return NULL; + return false; } static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type) { diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index a9cb10b0e2e9..e24aabfe8ecc 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -8,6 +8,9 @@ struct btf_id_set { u32 ids[]; }; +/* This flag implies BTF_SET8 holds kfunc(s) */ +#define BTF_SET8_KFUNCS (1 << 0) + struct btf_id_set8 { u32 cnt; u32 flags; @@ -21,6 +24,7 @@ struct btf_id_set8 { #include <linux/compiler.h> /* for __PASTE */ #include <linux/compiler_attributes.h> /* for __maybe_unused */ +#include <linux/stringify.h> /* * Following macros help to define lists of BTF IDs placed @@ -183,17 +187,18 @@ extern struct btf_id_set name; * .word (1 << 3) | (1 << 1) | (1 << 2) * */ -#define __BTF_SET8_START(name, scope) \ +#define __BTF_SET8_START(name, scope, flags) \ +__BTF_ID_LIST(name, local) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ "." #scope " __BTF_ID__set8__" #name "; \n" \ "__BTF_ID__set8__" #name ":; \n" \ -".zero 8 \n" \ +".zero 4 \n" \ +".long " __stringify(flags) "\n" \ ".popsection; \n"); #define BTF_SET8_START(name) \ -__BTF_ID_LIST(name, local) \ -__BTF_SET8_START(name, local) +__BTF_SET8_START(name, local, 0) #define BTF_SET8_END(name) \ asm( \ @@ -202,6 +207,12 @@ asm( \ ".popsection; \n"); \ extern struct btf_id_set8 name; +#define BTF_KFUNCS_START(name) \ +__BTF_SET8_START(name, local, BTF_SET8_KFUNCS) + +#define BTF_KFUNCS_END(name) \ +BTF_SET8_END(name) + #else #define BTF_ID_LIST(name) static u32 __maybe_unused name[64]; @@ -216,6 +227,8 @@ extern struct btf_id_set8 name; #define BTF_SET_END(name) #define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 }; #define BTF_SET8_END(name) +#define BTF_KFUNCS_START(name) static struct btf_id_set8 __maybe_unused name = { .flags = BTF_SET8_KFUNCS }; +#define BTF_KFUNCS_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 555aae5448ae..bd1e361b351c 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -83,7 +83,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -} __packed; +} __packed __aligned(4); struct bvec_iter_all { struct bio_vec bv; diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 91125eca4c8a..03fa6d50d46f 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -140,22 +140,4 @@ struct cxl_cper_event_rec { union cxl_event event; } __packed; -typedef void (*cxl_cper_callback)(enum cxl_event_type type, - struct cxl_cper_event_rec *rec); - -#ifdef CONFIG_ACPI_APEI_GHES -int cxl_cper_register_callback(cxl_cper_callback callback); -int cxl_cper_unregister_callback(cxl_cper_callback callback); -#else -static inline int cxl_cper_register_callback(cxl_cper_callback callback) -{ - return 0; -} - -static inline int cxl_cper_unregister_callback(cxl_cper_callback callback) -{ - return 0; -} -#endif - #endif /* _LINUX_CXL_EVENT_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1666c387861f..d07cf2f1bb7d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -173,6 +173,7 @@ struct dentry_operations { #define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ #define DCACHE_CANT_MOUNT BIT(8) +#define DCACHE_GENOCIDE BIT(9) #define DCACHE_SHRINK_LIST BIT(10) #define DCACHE_OP_WEAK_REVALIDATE BIT(11) diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 9cb02ad73d51..d275736230b3 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -10,6 +10,8 @@ #include <uapi/linux/dpll.h> #include <linux/device.h> #include <linux/netlink.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> struct dpll_device; struct dpll_pin; @@ -121,15 +123,24 @@ struct dpll_pin_properties { }; #if IS_ENABLED(CONFIG_DPLL) -size_t dpll_msg_pin_handle_size(struct dpll_pin *pin); -int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin); +void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void dpll_netdev_pin_clear(struct net_device *dev); + +size_t dpll_netdev_pin_handle_size(const struct net_device *dev); +int dpll_netdev_add_pin_handle(struct sk_buff *msg, + const struct net_device *dev); #else -static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +static inline void +dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) { } +static inline void dpll_netdev_pin_clear(struct net_device *dev) { } + +static inline size_t dpll_netdev_pin_handle_size(const struct net_device *dev) { return 0; } -static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +static inline int +dpll_netdev_add_pin_handle(struct sk_buff *msg, const struct net_device *dev) { return 0; } diff --git a/include/linux/filter.h b/include/linux/filter.h index fee070b9826e..36cc29a2934c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -547,24 +547,27 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ u64, __ur_3, u64, __ur_4, u64, __ur_5) -#define BPF_CALL_x(x, name, ...) \ +#define BPF_CALL_x(x, attr, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) -#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) -#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) -#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) -#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) -#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) -#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define __NOATTR +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__) + +#define NOTRACE_BPF_CALL_1(name, ...) BPF_CALL_x(1, notrace, name, __VA_ARGS__) #define bpf_ctx_range(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 diff --git a/include/linux/fs.h b/include/linux/fs.h index 023f37c60709..1fbc72c5f112 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -352,6 +352,8 @@ enum rw_hint { * unrelated IO (like cache flushing, new IO generation, etc). */ #define IOCB_DIO_CALLER_COMP (1 << 22) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index de292a007138..6cef1c241180 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -311,15 +311,23 @@ extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); struct page_frag_cache; +void page_frag_cache_drain(struct page_frag_cache *nc); extern void __page_frag_cache_drain(struct page *page, unsigned int count); -extern void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask); +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask, unsigned int align_mask); + +static inline void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); +} static inline void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask) { - return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); } extern void page_frag_free(void *addr); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2b00faf98017..6ef0557b4bff 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -164,8 +164,28 @@ struct hv_ring_buffer { u8 buffer[]; } __packed; + +/* + * If the requested ring buffer size is at least 8 times the size of the + * header, steal space from the ring buffer for the header. Otherwise, add + * space for the header so that is doesn't take too much of the ring buffer + * space. + * + * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a + * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring + * buffer size (such as 128 Kbytes) and so end up making a nearly twice as + * large allocation that will be almost half wasted. As a contrasting example, + * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the + * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring. + * In this latter case, we must add 64 Kbytes for the header and not worry + * about what's wasted. + */ +#define VMBUS_HEADER_ADJ(payload_sz) \ + ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \ + 0 : sizeof(struct hv_ring_buffer)) + /* Calculate the proper size of a ringbuffer, it must be page-aligned */ -#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \ +#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \ (payload_sz)) struct hv_ring_buffer_info { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1ea2a820e1eb..5e27cb3a3be9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -892,11 +892,14 @@ struct iommu_fwspec { struct iommu_sva { struct device *dev; struct iommu_domain *domain; + struct list_head handle_item; + refcount_t users; }; struct iommu_mm_data { u32 pasid; struct list_head sva_domains; + struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ef3aa060a289..383a0ea2ab91 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -3,6 +3,7 @@ #define _IPV6_H #include <uapi/linux/ipv6.h> +#include <linux/cache.h> #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) #define ipv6_authlen(p) (((p)->hdrlen+2) << 2) @@ -10,9 +11,16 @@ * This structure contains configuration options per IPv6 link. */ struct ipv6_devconf { - __s32 forwarding; + /* RX & TX fastpath fields. */ + __cacheline_group_begin(ipv6_devconf_read_txrx); + __s32 disable_ipv6; __s32 hop_limit; __s32 mtu6; + __s32 forwarding; + __s32 disable_policy; + __s32 proxy_ndp; + __cacheline_group_end(ipv6_devconf_read_txrx); + __s32 accept_ra; __s32 accept_redirects; __s32 autoconf; @@ -45,7 +53,6 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif - __s32 proxy_ndp; __s32 accept_source_route; __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -55,7 +62,6 @@ struct ipv6_devconf { #ifdef CONFIG_IPV6_MROUTE atomic_t mc_forwarding; #endif - __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; @@ -76,7 +82,6 @@ struct ipv6_devconf { #endif __u32 enhanced_dad; __u32 addr_gen_mode; - __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; __u32 ioam6_id; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b695f9e946da..e2082240586d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -121,6 +121,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); bool memblock_validate_numa_coverage(unsigned long threshold_bytes); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 628a3aa7a7e0..3a8a9e57a51d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10261,7 +10261,9 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_63_to_46[0x12]; u8 mrtc[0x1]; - u8 regs_44_to_32[0xd]; + u8 regs_44_to_41[0x4]; + u8 mfrl[0x1]; + u8 regs_39_to_32[0x8]; u8 regs_31_to_10[0x16]; u8 mtmp[0x1]; diff --git a/include/linux/net.h b/include/linux/net.h index c9b4a63791a4..15df6d5f27a7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -299,10 +299,7 @@ do { \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else #define net_dbg_ratelimited(fmt, ...) \ - do { \ - if (0) \ - no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ - } while (0) + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif #define net_get_random_once(buf, nbytes) \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 09023e44db4e..2767467138a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -79,8 +79,6 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; -/* DPLL specific */ -struct dpll_pin; typedef u32 xdp_features_t; @@ -2472,7 +2470,7 @@ struct net_device { struct devlink_port *devlink_port; #if IS_ENABLED(CONFIG_DPLL) - struct dpll_pin *dpll_pin; + struct dpll_pin __rcu *dpll_pin; #endif #if IS_ENABLED(CONFIG_PAGE_POOL) /** @page_pools: page pools created for this netdevice */ @@ -3500,6 +3498,16 @@ static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue #endif } +static inline int netdev_queue_dql_avail(const struct netdev_queue *txq) +{ +#ifdef CONFIG_BQL + /* Non-BQL migrated drivers will return 0, too. */ + return dql_avail(&txq->dql); +#else + return 0; +#endif +} + /** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue @@ -4033,17 +4041,6 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); -void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); -void netdev_dpll_pin_clear(struct net_device *dev); - -static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_DPLL) - return dev->dpll_pin; -#else - return NULL; -#endif -} struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ffb5e0297eb5..2683b2b77612 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -473,6 +473,7 @@ struct nf_ct_hook { const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); + int (*confirm)(struct sk_buff *skb); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index cd797e00fe35..92de074e63b9 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -124,6 +124,7 @@ struct nfs_client { char cl_ipaddr[48]; struct net *cl_net; struct list_head pending_cb_stateids; + struct rcu_head rcu; }; /* @@ -265,6 +266,7 @@ struct nfs_server { const struct cred *cred; bool has_sec_mnt_opts; struct kobject kobj; + struct rcu_head rcu; }; /* Server capabilities */ diff --git a/include/linux/phy.h b/include/linux/phy.h index e3ab2c347a59..3f68b8239bb1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -30,6 +30,7 @@ #include <linux/refcount.h> #include <linux/atomic.h> +#include <net/eee.h> #define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \ SUPPORTED_TP | \ @@ -594,6 +595,8 @@ struct macsec_ops; * @supported_eee: supported PHY EEE linkmodes * @advertising_eee: Currently advertised EEE linkmodes * @eee_enabled: Flag indicating whether the EEE feature is enabled + * @enable_tx_lpi: When True, MAC should transmit LPI to PHY + * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited @@ -703,7 +706,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); /* used with phy_speed_down */ __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); - /* used for eee validation */ + /* used for eee validation and configuration*/ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); bool eee_enabled; @@ -713,6 +716,8 @@ struct phy_device { /* Energy efficient ethernet modes which should be prohibited */ u32 eee_broken_modes; + bool enable_tx_lpi; + struct eee_config eee_cfg; #ifdef CONFIG_LED_TRIGGER_PHY struct phy_led_trigger *phy_led_triggers; @@ -1968,6 +1973,7 @@ void phy_advertise_supported(struct phy_device *phydev); void phy_advertise_eee_all(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); +void phy_support_eee(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 6ba411732a0d..9a57deefcb07 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -480,9 +480,6 @@ void pcs_disable(struct phylink_pcs *pcs); * negotiation completion state in @state->an_complete, and link up state * in @state->link. If possible, @state->lp_advertising should also be * populated. - * - * When present, this overrides pcs_get_state() in &struct - * phylink_pcs_ops. */ void pcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state); diff --git a/include/linux/poison.h b/include/linux/poison.h index 27a7dad17eef..1f0ee2459f2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -92,4 +92,7 @@ /********** VFS **********/ #define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA)) +/********** lib/stackdepot.c **********/ +#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA)) + #endif diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index de407e7c3b55..0b2a89854440 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -65,6 +65,7 @@ struct proc_fs_info { kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; + struct rcu_head rcu; }; static inline struct proc_fs_info *proc_sb_info(struct super_block *sb) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1470b74fb6d2..3013355b63f5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -822,9 +822,9 @@ typedef unsigned char *sk_buff_data_t; * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * delivery_time in mono clock base (i.e., EDT) or a clock base chosen + * by SO_TXTIME. If zero, skb->tstamp has the (rcv) timestamp at + * ingress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. @@ -3036,6 +3036,7 @@ static inline int skb_transport_offset(const struct sk_buff *skb) static inline u32 skb_network_header_len(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->transport_header - skb->network_header; } diff --git a/include/linux/swap.h b/include/linux/swap.h index 4db00ddad261..8d28f6091a32 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -549,6 +549,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 988a30ef6bfe..55399ee2a57e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -304,7 +304,7 @@ struct tcp_sock { __cacheline_group_end(tcp_sock_write_txrx); /* RX read-write hotpath cache lines */ - __cacheline_group_begin(tcp_sock_write_rx); + __cacheline_group_begin(tcp_sock_write_rx) __aligned(8); u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived * sum(delta(rcv_nxt)), or how many bytes diff --git a/include/linux/uio.h b/include/linux/uio.h index bea9c89922d9..00cebe2b70de 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -40,7 +40,6 @@ struct iov_iter_state { struct iov_iter { u8 iter_type; - bool copy_mc; bool nofault; bool data_source; size_t iov_offset; @@ -248,22 +247,8 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #ifdef CONFIG_ARCH_HAS_COPY_MC size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); -static inline void iov_iter_set_copy_mc(struct iov_iter *i) -{ - i->copy_mc = true; -} - -static inline bool iov_iter_is_copy_mc(const struct iov_iter *i) -{ - return i->copy_mc; -} #else #define _copy_mc_to_iter _copy_to_iter -static inline void iov_iter_set_copy_mc(struct iov_iter *i) { } -static inline bool iov_iter_is_copy_mc(const struct iov_iter *i) -{ - return false; -} #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); @@ -355,7 +340,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_UBUF, - .copy_mc = false, .data_source = direction, .ubuf = buf, .count = count, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 30d6f1e84e46..9d06eb945509 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -417,7 +417,7 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) if (unlikely(!idev)) return true; - return !!idev->cnf.ignore_routes_with_linkdown; + return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown); } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); diff --git a/include/net/eee.h b/include/net/eee.h new file mode 100644 index 000000000000..84837aba3cd9 --- /dev/null +++ b/include/net/eee.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _EEE_H +#define _EEE_H + +#include <linux/types.h> + +struct eee_config { + u32 tx_lpi_timer; + bool tx_lpi_enabled; + bool eee_enabled; +}; + +static inline bool eeecfg_mac_can_tx_lpi(const struct eee_config *eeecfg) +{ + /* eee_enabled is the master on/off */ + if (!eeecfg->eee_enabled || !eeecfg->tx_lpi_enabled) + return false; + + return true; +} + +static inline void eeecfg_to_eee(struct ethtool_keee *eee, + const struct eee_config *eeecfg) +{ + eee->tx_lpi_timer = eeecfg->tx_lpi_timer; + eee->tx_lpi_enabled = eeecfg->tx_lpi_enabled; + eee->eee_enabled = eeecfg->eee_enabled; +} + +static inline void eee_to_eeecfg(struct eee_config *eeecfg, + const struct ethtool_keee *eee) +{ + eeecfg->tx_lpi_timer = eee->tx_lpi_timer; + eeecfg->tx_lpi_enabled = eee->tx_lpi_enabled; + eeecfg->eee_enabled = eee->eee_enabled; +} + +#endif diff --git a/include/net/gro.h b/include/net/gro.h index b435f0ddbf64..2b58671a6549 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -139,21 +139,16 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) NAPI_GRO_CB(skb)->data_offset += len; } -static inline void *skb_gro_header_fast(struct sk_buff *skb, +static inline void *skb_gro_header_fast(const struct sk_buff *skb, unsigned int offset) { return NAPI_GRO_CB(skb)->frag0 + offset; } -static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +static inline bool skb_gro_may_pull(const struct sk_buff *skb, + unsigned int hlen) { - return NAPI_GRO_CB(skb)->frag0_len < hlen; -} - -static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) -{ - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; + return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); } static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, @@ -162,28 +157,30 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, if (!pskb_may_pull(skb, hlen)) return NULL; - skb_gro_frag0_invalidate(skb); return skb->data + offset; } -static inline void *skb_gro_header(struct sk_buff *skb, - unsigned int hlen, unsigned int offset) +static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) { void *ptr; ptr = skb_gro_header_fast(skb, offset); - if (skb_gro_header_hard(skb, hlen)) + if (!skb_gro_may_pull(skb, hlen)) ptr = skb_gro_header_slow(skb, hlen, offset); return ptr; } -static inline void *skb_gro_network_header(struct sk_buff *skb) +static inline void *skb_gro_network_header(const struct sk_buff *skb) { - return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + - skb_network_offset(skb); + if (skb_gro_may_pull(skb, skb_gro_offset(skb))) + return skb_gro_header_fast(skb, skb_network_offset(skb)); + + return skb_network_header(skb); } -static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) +static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, + int proto) { const struct iphdr *iph = skb_gro_network_header(skb); @@ -421,7 +418,8 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) return uh; } -static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) +static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, + int proto) { const struct ipv6hdr *iph = skb_gro_network_header(skb); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index d94c242eb3ed..f9ddd47dc4f8 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -274,6 +274,7 @@ enum { INET_FLAGS_REPFLOW = 27, INET_FLAGS_RTALERT_ISOLATE = 28, INET_FLAGS_SNDFLOW = 29, + INET_FLAGS_RTALERT = 30, }; /* cmsg flags for inet */ diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 52a51c69aa9d..a30c6aa9e5cf 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -332,7 +332,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst rcu_read_lock(); idev = __in6_dev_get(dst->dev); if (idev) - mtu = idev->cnf.mtu6; + mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); out: diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cf25ea21d770..88a8e554f7a1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -534,13 +534,15 @@ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) return 0; } -static inline bool ipv6_accept_ra(struct inet6_dev *idev) +static inline bool ipv6_accept_ra(const struct inet6_dev *idev) { + s32 accept_ra = READ_ONCE(idev->cnf.accept_ra); + /* If forwarding is enabled, RA are not accepted unless the special * hybrid mode (accept_ra=2) is enabled. */ - return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 : - idev->cnf.accept_ra; + return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 : + accept_ra; } #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ diff --git a/include/net/mctp.h b/include/net/mctp.h index 0dfae6f51a32..7b17c52e8ce2 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -250,6 +250,7 @@ struct mctp_route { struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, mctp_eid_t daddr); +/* always takes ownership of skb */ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 5dee575fbe86..3d07abacf08b 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -196,7 +196,7 @@ struct nfc_dev { }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) -extern struct class nfc_class; +extern const struct class nfc_class; struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 934fdb977551..cefe0c4bdae3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -238,12 +238,7 @@ static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) { -#ifdef CONFIG_BQL - /* Non-BQL migrated drivers will return 0, too. */ - return dql_avail(&txq->dql); -#else - return 0; -#endif + return netdev_queue_dql_avail(txq); } struct Qdisc_class_ops { diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 5ec1e71a09de..c38f4fe5e64c 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -100,10 +100,6 @@ struct scsi_vpd { unsigned char data[]; }; -enum scsi_vpd_parameters { - SCSI_VPD_HEADER_SIZE = 4, -}; - struct scsi_device { struct Scsi_Host *host; struct request_queue *request_queue; @@ -208,6 +204,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ diff --git a/include/sound/soc-card.h b/include/sound/soc-card.h index ecc02e955279..1f4c39922d82 100644 --- a/include/sound/soc-card.h +++ b/include/sound/soc-card.h @@ -30,6 +30,8 @@ static inline void snd_soc_card_mutex_unlock(struct snd_soc_card *card) struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card, const char *name); +struct snd_kcontrol *snd_soc_card_get_kcontrol_locked(struct snd_soc_card *soc_card, + const char *name); int snd_soc_card_jack_new(struct snd_soc_card *card, const char *id, int type, struct snd_soc_jack *jack); int snd_soc_card_jack_new_pins(struct snd_soc_card *card, const char *id, diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index a3995925cb05..1f4258308b96 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -81,14 +81,14 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; @@ -106,14 +106,14 @@ TRACE_EVENT(qdisc_destroy, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 7b1ddffa3dfc..6ca3e0343666 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -88,7 +88,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), - TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", + TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", + __entry->skbaddr, __entry->skaddr, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, @@ -361,7 +362,8 @@ DECLARE_EVENT_CLASS(tcp_event_skb, TP_STORE_ADDR_PORTS_SKB(__entry, skb); ), - TP_printk("src=%pISpc dest=%pISpc", __entry->saddr, __entry->daddr) + TP_printk("skbaddr=%p src=%pISpc dest=%pISpc", + __entry->skbaddr, __entry->saddr, __entry->daddr) ); DEFINE_EVENT(tcp_event_skb, tcp_bad_csum, diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 0bade1592f34..77d7ff0d5b11 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -54,6 +54,20 @@ extern "C" { */ #define NOUVEAU_GETPARAM_EXEC_PUSH_MAX 17 +/* + * NOUVEAU_GETPARAM_VRAM_BAR_SIZE - query bar size + * + * Query the VRAM BAR size. + */ +#define NOUVEAU_GETPARAM_VRAM_BAR_SIZE 18 + +/* + * NOUVEAU_GETPARAM_VRAM_USED + * + * Get remaining VRAM size. + */ +#define NOUVEAU_GETPARAM_VRAM_USED 19 + struct drm_nouveau_getparam { __u64 param; __u64 value; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 9fa3ae324731..bb0c8a994116 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -831,11 +831,6 @@ struct drm_xe_vm_destroy { * - %DRM_XE_VM_BIND_OP_PREFETCH * * and the @flags can be: - * - %DRM_XE_VM_BIND_FLAG_READONLY - * - %DRM_XE_VM_BIND_FLAG_ASYNC - * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the - * MAP operation immediately rather than deferring the MAP to the page - * fault handler. * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * tables are setup with a special bit which indicates writes are * dropped and all reads return zero. In the future, the NULL flags @@ -928,9 +923,8 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; -#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) +#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ __u32 flags; @@ -1045,20 +1039,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 3 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 -/* Monitor 128KB contiguous region with 4K sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_128K 0 -/* Monitor 2MB contiguous region with 64KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_2M 1 -/* Monitor 16MB contiguous region with 512KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_16M 2 -/* Monitor 64MB contiguous region with 2M sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_64M 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d96708380e52..a241f407c234 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -77,12 +77,29 @@ struct bpf_insn { __s32 imm; /* signed immediate constant */ }; -/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for + * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for + * the trailing flexible array member) instead. + */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ __u8 data[0]; /* Arbitrary size */ }; +/* Header for bpf_lpm_trie_key structs */ +struct bpf_lpm_trie_key_hdr { + __u32 prefixlen; +}; + +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */ +struct bpf_lpm_trie_key_u8 { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + __u8 data[]; /* Arbitrary size */ +}; + struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ __u32 attach_type; /* program attach type (enum bpf_attach_type) */ @@ -617,7 +634,11 @@ union bpf_iter_link_info { * to NULL to begin the batched operation. After each subsequent * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant * *out_batch* as the *in_batch* for the next operation to - * continue iteration from the current point. + * continue iteration from the current point. Both *in_batch* and + * *out_batch* must point to memory large enough to hold a key, + * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH, + * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters + * must be at least 4 bytes wide regardless of key size. * * The *keys* and *values* are output parameters which must point * to memory large enough to hold *count* items based on the key diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index c4c53a9ab959..ff8d21f9e95b 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,7 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ -#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ +#define IPV6_TLV_IOAM 49 /* RFC 9486 */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index d5b9cfbd9cea..628d46a0da92 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 16) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -416,7 +416,7 @@ struct snd_pcm_hw_params { unsigned int rmask; /* W: requested masks */ unsigned int cmask; /* R: changed masks */ unsigned int info; /* R: Info flags for returned setup */ - unsigned int msbits; /* R: used most significant bits */ + unsigned int msbits; /* R: used most significant bits (in sample bit-width) */ unsigned int rate_num; /* R: rate numerator */ unsigned int rate_den; /* R: rate denominator */ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ diff --git a/init/Kconfig b/init/Kconfig index 8426d59cc634..6acb3febeb8e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1466,11 +1466,6 @@ config SYSCTL_ARCH_UNALIGN_ALLOW config HAVE_PCSPKR_PLATFORM bool -# interpreter that classic socket filters depend on -config BPF - bool - select CRYPTO_LIB_SHA1 - menuconfig EXPERT bool "Configure standard kernel features (expert users)" # Unhide debug options, to make the on-by-default options visible diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index 6a906ff93006..bc25f5098a25 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -3,6 +3,7 @@ # BPF interpreter that, for example, classic socket filters depend on. config BPF bool + select CRYPTO_LIB_SHA1 # Used by archs to tell that they support BPF JIT compiler plus which # flavour. Only one of the two can be selected for a specific arch since diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 146824cc9689..bdea1a459153 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -414,47 +414,21 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) bpf_selem_unlink_storage(selem, reuse_now); } -/* If cacheit_lockit is false, this lookup function is lockless */ -struct bpf_local_storage_data * -bpf_local_storage_lookup(struct bpf_local_storage *local_storage, - struct bpf_local_storage_map *smap, - bool cacheit_lockit) +void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem) { - struct bpf_local_storage_data *sdata; - struct bpf_local_storage_elem *selem; - - /* Fast path (cache hit) */ - sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], - bpf_rcu_lock_held()); - if (sdata && rcu_access_pointer(sdata->smap) == smap) - return sdata; - - /* Slow path (cache miss) */ - hlist_for_each_entry_rcu(selem, &local_storage->list, snode, - rcu_read_lock_trace_held()) - if (rcu_access_pointer(SDATA(selem)->smap) == smap) - break; - - if (!selem) - return NULL; - - sdata = SDATA(selem); - if (cacheit_lockit) { - unsigned long flags; - - /* spinlock is needed to avoid racing with the - * parallel delete. Otherwise, publishing an already - * deleted sdata to the cache will become a use-after-free - * problem in the next bpf_local_storage_lookup(). - */ - raw_spin_lock_irqsave(&local_storage->lock, flags); - if (selem_linked_to_storage(selem)) - rcu_assign_pointer(local_storage->cache[smap->cache_idx], - sdata); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); - } + unsigned long flags; - return sdata; + /* spinlock is needed to avoid racing with the + * parallel delete. Otherwise, publishing an already + * deleted sdata to the cache will become a use-after-free + * problem in the next bpf_local_storage_lookup(). + */ + raw_spin_lock_irqsave(&local_storage->lock, flags); + if (selem_linked_to_storage(selem)) + rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem)); + raw_spin_unlock_irqrestore(&local_storage->lock, flags); } static int check_flags(const struct bpf_local_storage_data *old_sdata, diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 63b4dc495125..68240c3c6e7d 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -282,10 +282,6 @@ BTF_ID(func, bpf_lsm_file_lock) BTF_ID(func, bpf_lsm_file_open) BTF_ID(func, bpf_lsm_file_receive) -#ifdef CONFIG_SECURITY_NETWORK -BTF_ID(func, bpf_lsm_inet_conn_established) -#endif /* CONFIG_SECURITY_NETWORK */ - BTF_ID(func, bpf_lsm_inode_create) BTF_ID(func, bpf_lsm_inode_free_security) BTF_ID(func, bpf_lsm_inode_getattr) @@ -336,6 +332,8 @@ BTF_ID(func, bpf_lsm_sb_umount) BTF_ID(func, bpf_lsm_settime) #ifdef CONFIG_SECURITY_NETWORK +BTF_ID(func, bpf_lsm_inet_conn_established) + BTF_ID(func, bpf_lsm_socket_accept) BTF_ID(func, bpf_lsm_socket_bind) BTF_ID(func, bpf_lsm_socket_connect) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 0decd862dfe0..a6019087b467 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -116,17 +116,183 @@ static bool is_valid_value_type(struct btf *btf, s32 value_id, return true; } +#define MAYBE_NULL_SUFFIX "__nullable" +#define MAX_STUB_NAME 128 + +/* Return the type info of a stub function, if it exists. + * + * The name of a stub function is made up of the name of the struct_ops and + * the name of the function pointer member, separated by "__". For example, + * if the struct_ops type is named "foo_ops" and the function pointer + * member is named "bar", the stub function name would be "foo_ops__bar". + */ +static const struct btf_type * +find_stub_func_proto(const struct btf *btf, const char *st_op_name, + const char *member_name) +{ + char stub_func_name[MAX_STUB_NAME]; + const struct btf_type *func_type; + s32 btf_id; + int cp; + + cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s", + st_op_name, member_name); + if (cp >= MAX_STUB_NAME) { + pr_warn("Stub function name too long\n"); + return NULL; + } + btf_id = btf_find_by_name_kind(btf, stub_func_name, BTF_KIND_FUNC); + if (btf_id < 0) + return NULL; + func_type = btf_type_by_id(btf, btf_id); + if (!func_type) + return NULL; + + return btf_type_by_id(btf, func_type->type); /* FUNC_PROTO */ +} + +/* Prepare argument info for every nullable argument of a member of a + * struct_ops type. + * + * Initialize a struct bpf_struct_ops_arg_info according to type info of + * the arguments of a stub function. (Check kCFI for more information about + * stub functions.) + * + * Each member in the struct_ops type has a struct bpf_struct_ops_arg_info + * to provide an array of struct bpf_ctx_arg_aux, which in turn provides + * the information that used by the verifier to check the arguments of the + * BPF struct_ops program assigned to the member. Here, we only care about + * the arguments that are marked as __nullable. + * + * The array of struct bpf_ctx_arg_aux is eventually assigned to + * prog->aux->ctx_arg_info of BPF struct_ops programs and passed to the + * verifier. (See check_struct_ops_btf_id()) + * + * arg_info->info will be the list of struct bpf_ctx_arg_aux if success. If + * fails, it will be kept untouched. + */ +static int prepare_arg_info(struct btf *btf, + const char *st_ops_name, + const char *member_name, + const struct btf_type *func_proto, + struct bpf_struct_ops_arg_info *arg_info) +{ + const struct btf_type *stub_func_proto, *pointed_type; + const struct btf_param *stub_args, *args; + struct bpf_ctx_arg_aux *info, *info_buf; + u32 nargs, arg_no, info_cnt = 0; + u32 arg_btf_id; + int offset; + + stub_func_proto = find_stub_func_proto(btf, st_ops_name, member_name); + if (!stub_func_proto) + return 0; + + /* Check if the number of arguments of the stub function is the same + * as the number of arguments of the function pointer. + */ + nargs = btf_type_vlen(func_proto); + if (nargs != btf_type_vlen(stub_func_proto)) { + pr_warn("the number of arguments of the stub function %s__%s does not match the number of arguments of the member %s of struct %s\n", + st_ops_name, member_name, member_name, st_ops_name); + return -EINVAL; + } + + if (!nargs) + return 0; + + args = btf_params(func_proto); + stub_args = btf_params(stub_func_proto); + + info_buf = kcalloc(nargs, sizeof(*info_buf), GFP_KERNEL); + if (!info_buf) + return -ENOMEM; + + /* Prepare info for every nullable argument */ + info = info_buf; + for (arg_no = 0; arg_no < nargs; arg_no++) { + /* Skip arguments that is not suffixed with + * "__nullable". + */ + if (!btf_param_match_suffix(btf, &stub_args[arg_no], + MAYBE_NULL_SUFFIX)) + continue; + + /* Should be a pointer to struct */ + pointed_type = btf_type_resolve_ptr(btf, + args[arg_no].type, + &arg_btf_id); + if (!pointed_type || + !btf_type_is_struct(pointed_type)) { + pr_warn("stub function %s__%s has %s tagging to an unsupported type\n", + st_ops_name, member_name, MAYBE_NULL_SUFFIX); + goto err_out; + } + + offset = btf_ctx_arg_offset(btf, func_proto, arg_no); + if (offset < 0) { + pr_warn("stub function %s__%s has an invalid trampoline ctx offset for arg#%u\n", + st_ops_name, member_name, arg_no); + goto err_out; + } + + if (args[arg_no].type != stub_args[arg_no].type) { + pr_warn("arg#%u type in stub function %s__%s does not match with its original func_proto\n", + arg_no, st_ops_name, member_name); + goto err_out; + } + + /* Fill the information of the new argument */ + info->reg_type = + PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL; + info->btf_id = arg_btf_id; + info->btf = btf; + info->offset = offset; + + info++; + info_cnt++; + } + + if (info_cnt) { + arg_info->info = info_buf; + arg_info->cnt = info_cnt; + } else { + kfree(info_buf); + } + + return 0; + +err_out: + kfree(info_buf); + + return -EINVAL; +} + +/* Clean up the arg_info in a struct bpf_struct_ops_desc. */ +void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc) +{ + struct bpf_struct_ops_arg_info *arg_info; + int i; + + arg_info = st_ops_desc->arg_info; + for (i = 0; i < btf_type_vlen(st_ops_desc->type); i++) + kfree(arg_info[i].info); + + kfree(arg_info); +} + int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, struct btf *btf, struct bpf_verifier_log *log) { struct bpf_struct_ops *st_ops = st_ops_desc->st_ops; + struct bpf_struct_ops_arg_info *arg_info; const struct btf_member *member; const struct btf_type *t; s32 type_id, value_id; char value_name[128]; const char *mname; - int i; + int i, err; if (strlen(st_ops->name) + VALUE_PREFIX_LEN >= sizeof(value_name)) { @@ -136,6 +302,11 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, } sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name); + if (!st_ops->cfi_stubs) { + pr_warn("struct_ops for %s has no cfi_stubs\n", st_ops->name); + return -EINVAL; + } + type_id = btf_find_by_name_kind(btf, st_ops->name, BTF_KIND_STRUCT); if (type_id < 0) { @@ -160,6 +331,17 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, if (!is_valid_value_type(btf, value_id, t, value_name)) return -EINVAL; + arg_info = kcalloc(btf_type_vlen(t), sizeof(*arg_info), + GFP_KERNEL); + if (!arg_info) + return -ENOMEM; + + st_ops_desc->arg_info = arg_info; + st_ops_desc->type = t; + st_ops_desc->type_id = type_id; + st_ops_desc->value_id = value_id; + st_ops_desc->value_type = btf_type_by_id(btf, value_id); + for_each_member(i, t, member) { const struct btf_type *func_proto; @@ -167,43 +349,52 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, if (!*mname) { pr_warn("anon member in struct %s is not supported\n", st_ops->name); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto errout; } if (__btf_member_bitfield_size(t, member)) { pr_warn("bit field member %s in struct %s is not supported\n", mname, st_ops->name); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto errout; } func_proto = btf_type_resolve_func_ptr(btf, member->type, NULL); - if (func_proto && - btf_distill_func_proto(log, btf, + if (!func_proto) + continue; + + if (btf_distill_func_proto(log, btf, func_proto, mname, &st_ops->func_models[i])) { pr_warn("Error in parsing func ptr %s in struct %s\n", mname, st_ops->name); - return -EINVAL; + err = -EINVAL; + goto errout; } + + err = prepare_arg_info(btf, st_ops->name, mname, + func_proto, + arg_info + i); + if (err) + goto errout; } - if (i == btf_type_vlen(t)) { - if (st_ops->init(btf)) { - pr_warn("Error in init bpf_struct_ops %s\n", - st_ops->name); - return -EINVAL; - } else { - st_ops_desc->type_id = type_id; - st_ops_desc->type = t; - st_ops_desc->value_id = value_id; - st_ops_desc->value_type = btf_type_by_id(btf, - value_id); - } + if (st_ops->init(btf)) { + pr_warn("Error in init bpf_struct_ops %s\n", + st_ops->name); + err = -EINVAL; + goto errout; } return 0; + +errout: + bpf_struct_ops_desc_release(st_ops_desc); + + return err; } static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index edef96ceffa3..6ff0bd1a91d5 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1699,6 +1699,13 @@ static void btf_free_struct_meta_tab(struct btf *btf) static void btf_free_struct_ops_tab(struct btf *btf) { struct btf_struct_ops_tab *tab = btf->struct_ops_tab; + u32 i; + + if (!tab) + return; + + for (i = 0; i < tab->cnt; i++) + bpf_struct_ops_desc_release(&tab->ops[i]); kfree(tab); btf->struct_ops_tab = NULL; @@ -5687,15 +5694,29 @@ static int find_kern_ctx_type_id(enum bpf_prog_type prog_type) return ctx_type->type; } -const struct btf_type * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg) +bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg) { const struct btf_type *ctx_type; const char *tname, *ctx_tname; t = btf_type_by_id(btf, t->type); + + /* KPROBE programs allow bpf_user_pt_regs_t typedef, which we need to + * check before we skip all the typedef below. + */ + if (prog_type == BPF_PROG_TYPE_KPROBE) { + while (btf_type_is_modifier(t) && !btf_type_is_typedef(t)) + t = btf_type_by_id(btf, t->type); + + if (btf_type_is_typedef(t)) { + tname = btf_name_by_offset(btf, t->name_off); + if (tname && strcmp(tname, "bpf_user_pt_regs_t") == 0) + return true; + } + } + while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (!btf_type_is_struct(t)) { @@ -5704,27 +5725,30 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, * is not supported yet. * BPF_PROG_TYPE_RAW_TRACEPOINT is fine. */ - return NULL; + return false; } tname = btf_name_by_offset(btf, t->name_off); if (!tname) { bpf_log(log, "arg#%d struct doesn't have a name\n", arg); - return NULL; + return false; } ctx_type = find_canonical_prog_ctx_type(prog_type); if (!ctx_type) { bpf_log(log, "btf_vmlinux is malformed\n"); /* should not happen */ - return NULL; + return false; } again: ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off); if (!ctx_tname) { /* should not happen */ bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n"); - return NULL; + return false; } + /* program types without named context types work only with arg:ctx tag */ + if (ctx_tname[0] == '\0') + return false; /* only compare that prog's ctx type name is the same as * kernel expects. No need to compare field by field. * It's ok for bpf prog to do: @@ -5733,20 +5757,20 @@ again: * { // no fields of skb are ever used } */ if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0) - return ctx_type; + return true; if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0) - return ctx_type; + return true; if (strcmp(ctx_tname, tname)) { /* bpf_user_pt_regs_t is a typedef, so resolve it to * underlying struct and check name again */ if (!btf_type_is_modifier(ctx_type)) - return NULL; + return false; while (btf_type_is_modifier(ctx_type)) ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type); goto again; } - return ctx_type; + return true; } /* forward declarations for arch-specific underlying types of @@ -5898,7 +5922,7 @@ static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, enum bpf_prog_type prog_type, int arg) { - if (!btf_get_prog_ctx_type(log, btf, t, prog_type, arg)) + if (!btf_is_prog_ctx_type(log, btf, t, prog_type, arg)) return -ENOENT; return find_kern_ctx_type_id(prog_type); } @@ -6130,6 +6154,26 @@ static bool prog_args_trusted(const struct bpf_prog *prog) } } +int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, + u32 arg_no) +{ + const struct btf_param *args; + const struct btf_type *t; + int off = 0, i; + u32 sz; + + args = btf_params(func_proto); + for (i = 0; i < arg_no; i++) { + t = btf_type_by_id(btf, args[i].type); + t = btf_resolve_size(btf, t, &sz); + if (IS_ERR(t)) + return PTR_ERR(t); + off += roundup(sz, 8); + } + + return off; +} + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -6266,7 +6310,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, } info->reg_type = ctx_arg_info->reg_type; - info->btf = btf_vmlinux; + info->btf = ctx_arg_info->btf ? : btf_vmlinux; info->btf_id = ctx_arg_info->btf_id; return true; } @@ -6985,9 +7029,78 @@ static bool btf_is_dynptr_ptr(const struct btf *btf, const struct btf_type *t) return false; } +struct bpf_cand_cache { + const char *name; + u32 name_len; + u16 kind; + u16 cnt; + struct { + const struct btf *btf; + u32 id; + } cands[]; +}; + +static DEFINE_MUTEX(cand_cache_mutex); + +static struct bpf_cand_cache * +bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id); + +static int btf_get_ptr_to_btf_id(struct bpf_verifier_log *log, int arg_idx, + const struct btf *btf, const struct btf_type *t) +{ + struct bpf_cand_cache *cc; + struct bpf_core_ctx ctx = { + .btf = btf, + .log = log, + }; + u32 kern_type_id, type_id; + int err = 0; + + /* skip PTR and modifiers */ + type_id = t->type; + t = btf_type_by_id(btf, t->type); + while (btf_type_is_modifier(t)) { + type_id = t->type; + t = btf_type_by_id(btf, t->type); + } + + mutex_lock(&cand_cache_mutex); + cc = bpf_core_find_cands(&ctx, type_id); + if (IS_ERR(cc)) { + err = PTR_ERR(cc); + bpf_log(log, "arg#%d reference type('%s %s') candidate matching error: %d\n", + arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off), + err); + goto cand_cache_unlock; + } + if (cc->cnt != 1) { + bpf_log(log, "arg#%d reference type('%s %s') %s\n", + arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off), + cc->cnt == 0 ? "has no matches" : "is ambiguous"); + err = cc->cnt == 0 ? -ENOENT : -ESRCH; + goto cand_cache_unlock; + } + if (btf_is_module(cc->cands[0].btf)) { + bpf_log(log, "arg#%d reference type('%s %s') points to kernel module type (unsupported)\n", + arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off)); + err = -EOPNOTSUPP; + goto cand_cache_unlock; + } + kern_type_id = cc->cands[0].id; + +cand_cache_unlock: + mutex_unlock(&cand_cache_mutex); + if (err) + return err; + + return kern_type_id; +} + enum btf_arg_tag { ARG_TAG_CTX = 0x1, ARG_TAG_NONNULL = 0x2, + ARG_TAG_TRUSTED = 0x4, + ARG_TAG_NULLABLE = 0x8, }; /* Process BTF of a function to produce high-level expectation of function @@ -7053,6 +7166,8 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) args = (const struct btf_param *)(t + 1); nargs = btf_type_vlen(t); if (nargs > MAX_BPF_FUNC_REG_ARGS) { + if (!is_global) + return -EINVAL; bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n", tname, nargs, MAX_BPF_FUNC_REG_ARGS); return -EINVAL; @@ -7062,6 +7177,8 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (!btf_type_is_int(t) && !btf_is_any_enum(t)) { + if (!is_global) + return -EINVAL; bpf_log(log, "Global function %s() doesn't return scalar. Only those are supported.\n", tname); @@ -7089,8 +7206,12 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) if (strcmp(tag, "ctx") == 0) { tags |= ARG_TAG_CTX; + } else if (strcmp(tag, "trusted") == 0) { + tags |= ARG_TAG_TRUSTED; } else if (strcmp(tag, "nonnull") == 0) { tags |= ARG_TAG_NONNULL; + } else if (strcmp(tag, "nullable") == 0) { + tags |= ARG_TAG_NULLABLE; } else { bpf_log(log, "arg#%d has unsupported set of tags\n", i); return -EOPNOTSUPP; @@ -7107,11 +7228,15 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) if (!btf_type_is_ptr(t)) goto skip_pointer; - if ((tags & ARG_TAG_CTX) || btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { + if ((tags & ARG_TAG_CTX) || btf_is_prog_ctx_type(log, btf, t, prog_type, i)) { if (tags & ~ARG_TAG_CTX) { bpf_log(log, "arg#%d has invalid combination of tags\n", i); return -EINVAL; } + if ((tags & ARG_TAG_CTX) && + btf_validate_prog_ctx_type(log, btf, t, i, prog_type, + prog->expected_attach_type)) + return -EINVAL; sub->args[i].arg_type = ARG_PTR_TO_CTX; continue; } @@ -7123,9 +7248,32 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) sub->args[i].arg_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY; continue; } + if (tags & ARG_TAG_TRUSTED) { + int kern_type_id; + + if (tags & ARG_TAG_NONNULL) { + bpf_log(log, "arg#%d has invalid combination of tags\n", i); + return -EINVAL; + } + + kern_type_id = btf_get_ptr_to_btf_id(log, i, btf, t); + if (kern_type_id < 0) + return kern_type_id; + + sub->args[i].arg_type = ARG_PTR_TO_BTF_ID | PTR_TRUSTED; + if (tags & ARG_TAG_NULLABLE) + sub->args[i].arg_type |= PTR_MAYBE_NULL; + sub->args[i].btf_id = kern_type_id; + continue; + } if (is_global) { /* generic user data pointer */ u32 mem_size; + if (tags & ARG_TAG_NULLABLE) { + bpf_log(log, "arg#%d has invalid combination of tags\n", i); + return -EINVAL; + } + t = btf_type_skip_modifiers(btf, t->type, NULL); ref_t = btf_resolve_size(btf, t, &mem_size); if (IS_ERR(ref_t)) { @@ -7151,28 +7299,13 @@ skip_pointer: sub->args[i].arg_type = ARG_ANYTHING; continue; } + if (!is_global) + return -EINVAL; bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n", i, btf_type_str(t), tname); return -EINVAL; } - for (i = 0; i < nargs; i++) { - const char *tag; - - if (sub->args[i].arg_type != ARG_PTR_TO_CTX) - continue; - - /* check if arg has "arg:ctx" tag */ - t = btf_type_by_id(btf, args[i].type); - tag = btf_find_decl_tag_value(btf, fn_t, i, "arg:"); - if (IS_ERR_OR_NULL(tag) || strcmp(tag, "ctx") != 0) - continue; - - if (btf_validate_prog_ctx_type(log, btf, t, i, prog_type, - prog->expected_attach_type)) - return -EINVAL; - } - sub->arg_cnt = nargs; sub->args_cached = true; @@ -7649,6 +7782,17 @@ static struct btf *btf_get_module_btf(const struct module *module) return btf; } +static int check_btf_kconfigs(const struct module *module, const char *feature) +{ + if (!module && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { + pr_err("missing vmlinux BTF, cannot register %s\n", feature); + return -ENOENT; + } + if (module && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + pr_warn("missing module BTF, cannot register %s\n", feature); + return 0; +} + BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags) { struct btf *btf = NULL; @@ -8009,15 +8153,8 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, int ret, i; btf = btf_get_module_btf(kset->owner); - if (!btf) { - if (!kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { - pr_err("missing vmlinux BTF, cannot register kfuncs\n"); - return -ENOENT; - } - if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) - pr_warn("missing module BTF, cannot register kfuncs\n"); - return 0; - } + if (!btf) + return check_btf_kconfigs(kset->owner, "kfunc"); if (IS_ERR(btf)) return PTR_ERR(btf); @@ -8041,6 +8178,14 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, { enum btf_kfunc_hook hook; + /* All kfuncs need to be tagged as such in BTF. + * WARN() for initcall registrations that do not check errors. + */ + if (!(kset->set->flags & BTF_SET8_KFUNCS)) { + WARN_ON(!kset->owner); + return -EINVAL; + } + hook = bpf_prog_type_to_kfunc_hook(prog_type); return __register_btf_kfunc_id_set(hook, kset); } @@ -8117,17 +8262,8 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c int ret; btf = btf_get_module_btf(owner); - if (!btf) { - if (!owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { - pr_err("missing vmlinux BTF, cannot register dtor kfuncs\n"); - return -ENOENT; - } - if (owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) { - pr_err("missing module BTF, cannot register dtor kfuncs\n"); - return -ENOENT; - } - return 0; - } + if (!btf) + return check_btf_kconfigs(owner, "dtor kfuncs"); if (IS_ERR(btf)) return PTR_ERR(btf); @@ -8242,17 +8378,6 @@ size_t bpf_core_essential_name_len(const char *name) return n; } -struct bpf_cand_cache { - const char *name; - u32 name_len; - u16 kind; - u16 cnt; - struct { - const struct btf *btf; - u32 id; - } cands[]; -}; - static void bpf_free_cands(struct bpf_cand_cache *cands) { if (!cands->cnt) @@ -8273,8 +8398,6 @@ static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE]; #define MODULE_CAND_CACHE_SIZE 31 static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE]; -static DEFINE_MUTEX(cand_cache_mutex); - static void __print_cand_cache(struct bpf_verifier_log *log, struct bpf_cand_cache **cache, int cache_size) @@ -8803,7 +8926,9 @@ int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops) btf = btf_get_module_btf(st_ops->owner); if (!btf) - return -EINVAL; + return check_btf_kconfigs(st_ops->owner, "struct_ops"); + if (IS_ERR(btf)) + return PTR_ERR(btf); log = kzalloc(sizeof(*log), GFP_KERNEL | __GFP_NOWARN); if (!log) { @@ -8823,3 +8948,21 @@ errout: } EXPORT_SYMBOL_GPL(__register_bpf_struct_ops); #endif + +bool btf_param_match_suffix(const struct btf *btf, + const struct btf_param *arg, + const char *suffix) +{ + int suffix_len = strlen(suffix), len; + const char *param_name; + + /* In the future, this can be ported to use BTF tagging */ + param_name = btf_name_by_offset(btf, arg->name_off); + if (str_is_empty(param_name)) + return false; + len = strlen(param_name); + if (len <= suffix_len) + return false; + param_name += len - suffix_len; + return !strncmp(param_name, suffix, suffix_len); +} diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 98e0e3835b28..82243cb6c54d 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1358,15 +1358,12 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, enum cgroup_bpf_attach_type atype) { - unsigned int offset = skb->data - skb_network_header(skb); + unsigned int offset = -skb_network_offset(skb); struct sock *save_sk; void *saved_data_end; struct cgroup *cgrp; int ret; - if (!sk || !sk_fullsock(sk)) - return 0; - if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) return 0; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 8a0bb80fe48a..ef82ffc90cbe 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -178,7 +178,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, void **frames, int n, struct xdp_cpumap_stats *stats) { - struct xdp_rxq_info rxq; + struct xdp_rxq_info rxq = {}; struct xdp_buff xdp; int i, nframes = 0; diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 2e73533a3811..dad0fb1c8e87 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -424,7 +424,7 @@ __bpf_kfunc u32 bpf_cpumask_weight(const struct cpumask *cpumask) __bpf_kfunc_end_defs(); -BTF_SET8_START(cpumask_kfunc_btf_ids) +BTF_KFUNCS_START(cpumask_kfunc_btf_ids) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) @@ -450,7 +450,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_weight, KF_RCU) -BTF_SET8_END(cpumask_kfunc_btf_ids) +BTF_KFUNCS_END(cpumask_kfunc_btf_ids) static const struct btf_kfunc_id_set cpumask_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3da54d0b3469..a89587859571 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -334,7 +334,7 @@ static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) __this_cpu_write(irqsave_flags, flags); } -notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) { __bpf_spin_lock_irqsave(lock); return 0; @@ -357,7 +357,7 @@ static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) local_irq_restore(flags); } -notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) { __bpf_spin_unlock_irqrestore(lock); return 0; @@ -2487,9 +2487,9 @@ __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj) return obj; } -__bpf_kfunc void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k) +__bpf_kfunc void *bpf_rdonly_cast(const void *obj__ign, u32 btf_id__k) { - return obj__ign; + return (void *)obj__ign; } __bpf_kfunc void bpf_rcu_read_lock(void) @@ -2547,7 +2547,7 @@ __bpf_kfunc void bpf_throw(u64 cookie) __bpf_kfunc_end_defs(); -BTF_SET8_START(generic_btf_ids) +BTF_KFUNCS_START(generic_btf_ids) #ifdef CONFIG_KEXEC_CORE BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif @@ -2576,7 +2576,7 @@ BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_throw) -BTF_SET8_END(generic_btf_ids) +BTF_KFUNCS_END(generic_btf_ids) static const struct btf_kfunc_id_set generic_kfunc_set = { .owner = THIS_MODULE, @@ -2592,7 +2592,7 @@ BTF_ID(struct, cgroup) BTF_ID(func, bpf_cgroup_release_dtor) #endif -BTF_SET8_START(common_btf_ids) +BTF_KFUNCS_START(common_btf_ids) BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) BTF_ID_FLAGS(func, bpf_rdonly_cast) BTF_ID_FLAGS(func, bpf_rcu_read_lock) @@ -2621,7 +2621,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) BTF_ID_FLAGS(func, bpf_dynptr_size) BTF_ID_FLAGS(func, bpf_dynptr_clone) -BTF_SET8_END(common_btf_ids) +BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 594a234f122b..63c34e7b0715 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -9,6 +9,7 @@ #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/math64.h> +#include <linux/string.h> #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) @@ -333,7 +334,8 @@ find_linfo(const struct bpf_verifier_env *env, u32 insn_off) { const struct bpf_line_info *linfo; const struct bpf_prog *prog; - u32 i, nr_linfo; + u32 nr_linfo; + int l, r, m; prog = env->prog; nr_linfo = prog->aux->nr_linfo; @@ -342,11 +344,30 @@ find_linfo(const struct bpf_verifier_env *env, u32 insn_off) return NULL; linfo = prog->aux->linfo; - for (i = 1; i < nr_linfo; i++) - if (insn_off < linfo[i].insn_off) - break; + /* Loop invariant: linfo[l].insn_off <= insns_off. + * linfo[0].insn_off == 0 which always satisfies above condition. + * Binary search is searching for rightmost linfo entry that satisfies + * the above invariant, giving us the desired record that covers given + * instruction offset. + */ + l = 0; + r = nr_linfo - 1; + while (l < r) { + /* (r - l + 1) / 2 means we break a tie to the right, so if: + * l=1, r=2, linfo[l].insn_off <= insn_off, linfo[r].insn_off > insn_off, + * then m=2, we see that linfo[m].insn_off > insn_off, and so + * r becomes 1 and we exit the loop with correct l==1. + * If the tie was broken to the left, m=1 would end us up in + * an endless loop where l and m stay at 1 and r stays at 2. + */ + m = l + (r - l + 1) / 2; + if (linfo[m].insn_off <= insn_off) + l = m; + else + r = m - 1; + } - return &linfo[i - 1]; + return &linfo[l]; } static const char *ltrim(const char *s) @@ -361,13 +382,28 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, u32 insn_off, const char *prefix_fmt, ...) { - const struct bpf_line_info *linfo; + const struct bpf_line_info *linfo, *prev_linfo; + const struct btf *btf; + const char *s, *fname; if (!bpf_verifier_log_needed(&env->log)) return; + prev_linfo = env->prev_linfo; linfo = find_linfo(env, insn_off); - if (!linfo || linfo == env->prev_linfo) + if (!linfo || linfo == prev_linfo) + return; + + /* It often happens that two separate linfo records point to the same + * source code line, but have differing column numbers. Given verifier + * log doesn't emit column information, from user perspective we just + * end up emitting the same source code line twice unnecessarily. + * So instead check that previous and current linfo record point to + * the same file (file_name_offs match) and the same line number, and + * avoid emitting duplicated source code line in such case. + */ + if (prev_linfo && linfo->file_name_off == prev_linfo->file_name_off && + BPF_LINE_INFO_LINE_NUM(linfo->line_col) == BPF_LINE_INFO_LINE_NUM(prev_linfo->line_col)) return; if (prefix_fmt) { @@ -378,9 +414,15 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, va_end(args); } - verbose(env, "%s\n", - ltrim(btf_name_by_offset(env->prog->aux->btf, - linfo->line_off))); + btf = env->prog->aux->btf; + s = ltrim(btf_name_by_offset(btf, linfo->line_off)); + verbose(env, "%s", s); /* source code line */ + + s = btf_name_by_offset(btf, linfo->file_name_off); + /* leave only file name */ + fname = strrchr(s, '/'); + fname = fname ? fname + 1 : s; + verbose(env, " @ %s:%u\n", fname, BPF_LINE_INFO_LINE_NUM(linfo->line_col)); env->prev_linfo = linfo; } diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index b32be680da6c..050fe1ebf0f7 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -164,13 +164,13 @@ static inline int extract_bit(const u8 *data, size_t index) */ static size_t longest_prefix_match(const struct lpm_trie *trie, const struct lpm_trie_node *node, - const struct bpf_lpm_trie_key *key) + const struct bpf_lpm_trie_key_u8 *key) { u32 limit = min(node->prefixlen, key->prefixlen); u32 prefixlen = 0, i = 0; BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32)); - BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32)); + BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key_u8, data) % sizeof(u32)); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT) @@ -229,7 +229,7 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie_node *node, *found = NULL; - struct bpf_lpm_trie_key *key = _key; + struct bpf_lpm_trie_key_u8 *key = _key; if (key->prefixlen > trie->max_prefixlen) return NULL; @@ -309,7 +309,7 @@ static long trie_update_elem(struct bpf_map *map, struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL; struct lpm_trie_node __rcu **slot; - struct bpf_lpm_trie_key *key = _key; + struct bpf_lpm_trie_key_u8 *key = _key; unsigned long irq_flags; unsigned int next_bit; size_t matchlen = 0; @@ -437,7 +437,7 @@ out: static long trie_delete_elem(struct bpf_map *map, void *_key) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); - struct bpf_lpm_trie_key *key = _key; + struct bpf_lpm_trie_key_u8 *key = _key; struct lpm_trie_node __rcu **trim, **trim2; struct lpm_trie_node *node, *parent; unsigned long irq_flags; @@ -536,7 +536,7 @@ out: sizeof(struct lpm_trie_node)) #define LPM_VAL_SIZE_MIN 1 -#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key) + (X)) +#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key_u8) + (X)) #define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX) #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) @@ -565,7 +565,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) /* copy mandatory map attributes */ bpf_map_init_from_attr(&trie->map, attr); trie->data_size = attr->key_size - - offsetof(struct bpf_lpm_trie_key, data); + offsetof(struct bpf_lpm_trie_key_u8, data); trie->max_prefixlen = trie->data_size * 8; spin_lock_init(&trie->lock); @@ -616,7 +616,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) { struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root; struct lpm_trie *trie = container_of(map, struct lpm_trie, map); - struct bpf_lpm_trie_key *key = _key, *next_key = _next_key; + struct bpf_lpm_trie_key_u8 *key = _key, *next_key = _next_key; struct lpm_trie_node **node_stack = NULL; int err = 0, stack_ptr = -1; unsigned int next_bit; @@ -703,7 +703,7 @@ find_leftmost: } do_copy: next_key->prefixlen = next_node->prefixlen; - memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data), + memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key_u8, data), next_node->data, trie->data_size); free_stack: kfree(node_stack); @@ -715,7 +715,7 @@ static int trie_check_btf(const struct bpf_map *map, const struct btf_type *key_type, const struct btf_type *value_type) { - /* Keys must have struct bpf_lpm_trie_key embedded. */ + /* Keys must have struct bpf_lpm_trie_key_u8 embedded. */ return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ? -EINVAL : 0; } diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index 6abd7c5df4b3..9575314f40a6 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -213,9 +213,9 @@ __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map) __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_map_iter_kfunc_ids) +BTF_KFUNCS_START(bpf_map_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_map_iter_kfunc_ids) +BTF_KFUNCS_END(bpf_map_iter_kfunc_ids) static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c index 0bca93b60c43..d6ccf8d00eab 100644 --- a/kernel/bpf/token.c +++ b/kernel/bpf/token.c @@ -72,28 +72,28 @@ static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp) u64 mask; BUILD_BUG_ON(__MAX_BPF_CMD >= 64); - mask = (1ULL << __MAX_BPF_CMD) - 1; + mask = BIT_ULL(__MAX_BPF_CMD) - 1; if ((token->allowed_cmds & mask) == mask) seq_printf(m, "allowed_cmds:\tany\n"); else seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds); BUILD_BUG_ON(__MAX_BPF_MAP_TYPE >= 64); - mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; + mask = BIT_ULL(__MAX_BPF_MAP_TYPE) - 1; if ((token->allowed_maps & mask) == mask) seq_printf(m, "allowed_maps:\tany\n"); else seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps); BUILD_BUG_ON(__MAX_BPF_PROG_TYPE >= 64); - mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; + mask = BIT_ULL(__MAX_BPF_PROG_TYPE) - 1; if ((token->allowed_progs & mask) == mask) seq_printf(m, "allowed_progs:\tany\n"); else seq_printf(m, "allowed_progs:\t0x%llx\n", token->allowed_progs); BUILD_BUG_ON(__MAX_BPF_ATTACH_TYPE >= 64); - mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; + mask = BIT_ULL(__MAX_BPF_ATTACH_TYPE) - 1; if ((token->allowed_attachs & mask) == mask) seq_printf(m, "allowed_attachs:\tany\n"); else @@ -253,7 +253,7 @@ bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd) { if (!token) return false; - if (!(token->allowed_cmds & (1ULL << cmd))) + if (!(token->allowed_cmds & BIT_ULL(cmd))) return false; return security_bpf_token_cmd(token, cmd) == 0; } @@ -263,7 +263,7 @@ bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type t if (!token || type >= __MAX_BPF_MAP_TYPE) return false; - return token->allowed_maps & (1ULL << type); + return token->allowed_maps & BIT_ULL(type); } bool bpf_token_allow_prog_type(const struct bpf_token *token, @@ -273,6 +273,6 @@ bool bpf_token_allow_prog_type(const struct bpf_token *token, if (!token || prog_type >= __MAX_BPF_PROG_TYPE || attach_type >= __MAX_BPF_ATTACH_TYPE) return false; - return (token->allowed_progs & (1ULL << prog_type)) && - (token->allowed_attachs & (1ULL << attach_type)); + return (token->allowed_progs & BIT_ULL(prog_type)) && + (token->allowed_attachs & BIT_ULL(attach_type)); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 174be4d0e21a..21f80383f8b2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -528,6 +528,11 @@ static bool is_sync_callback_calling_insn(struct bpf_insn *insn) (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm)); } +static bool is_async_callback_calling_insn(struct bpf_insn *insn) +{ + return bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm); +} + static bool is_storage_get_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_storage_get || @@ -1155,6 +1160,12 @@ static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack) stack->spilled_ptr.type == SCALAR_VALUE; } +static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack) +{ + return stack->slot_type[0] == STACK_SPILL && + stack->spilled_ptr.type == SCALAR_VALUE; +} + /* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which * case they are equivalent, or it's STACK_ZERO, in which case we preserve * more precise STACK_ZERO. @@ -2264,8 +2275,7 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg) } /* Mark a register as having a completely unknown (scalar) value. */ -static void __mark_reg_unknown(const struct bpf_verifier_env *env, - struct bpf_reg_state *reg) +static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg) { /* * Clear type, off, and union(map_ptr, range) and @@ -2277,10 +2287,20 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env, reg->ref_obj_id = 0; reg->var_off = tnum_unknown; reg->frameno = 0; - reg->precise = !env->bpf_capable; + reg->precise = false; __mark_reg_unbounded(reg); } +/* Mark a register as having a completely unknown (scalar) value, + * initialize .precise as true when not bpf capable. + */ +static void __mark_reg_unknown(const struct bpf_verifier_env *env, + struct bpf_reg_state *reg) +{ + __mark_reg_unknown_imprecise(reg); + reg->precise = !env->bpf_capable; +} + static void mark_reg_unknown(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno) { @@ -4380,20 +4400,6 @@ static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32) return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value; } -static bool __is_scalar_unbounded(struct bpf_reg_state *reg) -{ - return tnum_is_unknown(reg->var_off) && - reg->smin_value == S64_MIN && reg->smax_value == S64_MAX && - reg->umin_value == 0 && reg->umax_value == U64_MAX && - reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX && - reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX; -} - -static bool register_is_bounded(struct bpf_reg_state *reg) -{ - return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg); -} - static bool __is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg) { @@ -4504,7 +4510,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, return err; mark_stack_slot_scratched(env, spi); - if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && env->bpf_capable) { + if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) { bool reg_value_fits; reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size; @@ -4792,7 +4798,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, if (dst_regno < 0) return 0; - if (!(off % BPF_REG_SIZE) && size == spill_size) { + if (size <= spill_size && + bpf_stack_narrow_access_ok(off, size, spill_size)) { /* The earlier check_reg_arg() has decided the * subreg_def for this insn. Save it first. */ @@ -4800,6 +4807,12 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, copy_register_state(&state->regs[dst_regno], reg); state->regs[dst_regno].subreg_def = subreg_def; + + /* Break the relation on a narrowing fill. + * coerce_reg_to_size will adjust the boundaries. + */ + if (get_reg_width(reg) > size * BITS_PER_BYTE) + state->regs[dst_regno].id = 0; } else { int spill_cnt = 0, zero_cnt = 0; @@ -5247,6 +5260,11 @@ bad_type: return -EINVAL; } +static bool in_sleepable(struct bpf_verifier_env *env) +{ + return env->prog->aux->sleepable; +} + /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock() * can dereference RCU protected pointers and result is PTR_TRUSTED. */ @@ -5254,7 +5272,7 @@ static bool in_rcu_cs(struct bpf_verifier_env *env) { return env->cur_state->active_rcu_lock || env->cur_state->active_lock.ptr || - !env->prog->aux->sleepable; + !in_sleepable(env); } /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */ @@ -5806,6 +5824,17 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, strict); } +static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) +{ + if (env->prog->jit_requested) + return round_up(stack_depth, 16); + + /* round up to 32-bytes, since this is granularity + * of interpreter stack size + */ + return round_up(max_t(u32, stack_depth, 1), 32); +} + /* starting from main bpf function walk all instructions of the function * and recursively walk all callees that given function can call. * Ignore jump and exit insns. @@ -5849,10 +5878,7 @@ process_func: depth); return -EACCES; } - /* round up to 32-bytes, since this is granularity - * of interpreter stack size - */ - depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); + depth += round_up_stack_depth(env, subprog[idx].stack_depth); if (depth > MAX_BPF_STACK) { verbose(env, "combined stack size of %d calls is %d. Too large\n", frame + 1, depth); @@ -5946,7 +5972,7 @@ continue_func: */ if (frame == 0) return 0; - depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); + depth -= round_up_stack_depth(env, subprog[idx].stack_depth); frame--; i = ret_insn[frame]; idx = ret_prog[frame]; @@ -6077,10 +6103,10 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) * values are also truncated so we push 64-bit bounds into * 32-bit bounds. Above were truncated < 32-bits already. */ - if (size < 4) { + if (size < 4) __mark_reg32_unbounded(reg); - reg_bounds_sync(reg); - } + + reg_bounds_sync(reg); } static void set_sext64_default_val(struct bpf_reg_state *reg, int size) @@ -8236,6 +8262,7 @@ found: switch ((int)reg->type) { case PTR_TO_BTF_ID: case PTR_TO_BTF_ID | PTR_TRUSTED: + case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL: case PTR_TO_BTF_ID | MEM_RCU: case PTR_TO_BTF_ID | PTR_MAYBE_NULL: case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU: @@ -9338,6 +9365,18 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0); if (ret) return ret; + } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { + struct bpf_call_arg_meta meta; + int err; + + if (register_is_null(reg) && type_may_be_null(arg->arg_type)) + continue; + + memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */ + err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta); + err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type); + if (err) + return err; } else { bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n", i, arg->arg_type); @@ -9413,9 +9452,7 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins return -EFAULT; } - if (insn->code == (BPF_JMP | BPF_CALL) && - insn->src_reg == 0 && - insn->imm == BPF_FUNC_timer_set_callback) { + if (is_async_callback_calling_insn(insn)) { struct bpf_verifier_state *async_cb; /* there is no real recursion here. timer callbacks are async */ @@ -9474,6 +9511,13 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (subprog_is_global(env, subprog)) { const char *sub_name = subprog_name(env, subprog); + /* Only global subprogs cannot be called with a lock held. */ + if (env->cur_state->active_lock.ptr) { + verbose(env, "global function calls are not allowed while holding a lock,\n" + "use static function instead\n"); + return -EINVAL; + } + if (err) { verbose(env, "Caller passes invalid args into func#%d ('%s')\n", subprog, sub_name); @@ -10130,7 +10174,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } - if (!env->prog->aux->sleepable && fn->might_sleep) { + if (!in_sleepable(env) && fn->might_sleep) { verbose(env, "helper call might sleep in a non-sleepable prog\n"); return -EINVAL; } @@ -10160,7 +10204,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } - if (env->prog->aux->sleepable && is_storage_get_function(func_id)) + if (in_sleepable(env) && is_storage_get_function(func_id)) env->insn_aux_data[insn_idx].storage_get_func_atomic = true; } @@ -10656,24 +10700,6 @@ static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta) return meta->kfunc_flags & KF_RCU_PROTECTED; } -static bool __kfunc_param_match_suffix(const struct btf *btf, - const struct btf_param *arg, - const char *suffix) -{ - int suffix_len = strlen(suffix), len; - const char *param_name; - - /* In the future, this can be ported to use BTF tagging */ - param_name = btf_name_by_offset(btf, arg->name_off); - if (str_is_empty(param_name)) - return false; - len = strlen(param_name); - if (len < suffix_len) - return false; - param_name += len - suffix_len; - return !strncmp(param_name, suffix, suffix_len); -} - static bool is_kfunc_arg_mem_size(const struct btf *btf, const struct btf_param *arg, const struct bpf_reg_state *reg) @@ -10684,7 +10710,7 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf, if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) return false; - return __kfunc_param_match_suffix(btf, arg, "__sz"); + return btf_param_match_suffix(btf, arg, "__sz"); } static bool is_kfunc_arg_const_mem_size(const struct btf *btf, @@ -10697,47 +10723,47 @@ static bool is_kfunc_arg_const_mem_size(const struct btf *btf, if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) return false; - return __kfunc_param_match_suffix(btf, arg, "__szk"); + return btf_param_match_suffix(btf, arg, "__szk"); } static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__opt"); + return btf_param_match_suffix(btf, arg, "__opt"); } static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__k"); + return btf_param_match_suffix(btf, arg, "__k"); } static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__ign"); + return btf_param_match_suffix(btf, arg, "__ign"); } static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__alloc"); + return btf_param_match_suffix(btf, arg, "__alloc"); } static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__uninit"); + return btf_param_match_suffix(btf, arg, "__uninit"); } static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__refcounted_kptr"); + return btf_param_match_suffix(btf, arg, "__refcounted_kptr"); } static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__nullable"); + return btf_param_match_suffix(btf, arg, "__nullable"); } static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__str"); + return btf_param_match_suffix(btf, arg, "__str"); } static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, @@ -11007,7 +11033,7 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, * type to our caller. When a set of conditions hold in the BTF type of * arguments, we resolve it to a known kfunc_ptr_arg_type. */ - if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno)) + if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno)) return KF_ARG_PTR_TO_CTX; if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno])) @@ -11519,7 +11545,7 @@ static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) return true; fallthrough; default: - return env->prog->aux->sleepable; + return in_sleepable(env); } } @@ -12040,7 +12066,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } sleepable = is_kfunc_sleepable(&meta); - if (sleepable && !env->prog->aux->sleepable) { + if (sleepable && !in_sleepable(env)) { verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name); return -EACCES; } @@ -15567,7 +15593,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env) return DONE_EXPLORING; case BPF_CALL: - if (insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback) + if (is_async_callback_calling_insn(insn)) /* Mark this call insn as a prune point to trigger * is_state_visited() check before call itself is * processed by __check_func_call(). Otherwise new @@ -16483,6 +16509,43 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } } +static struct bpf_reg_state unbound_reg; + +static __init int unbound_reg_init(void) +{ + __mark_reg_unknown_imprecise(&unbound_reg); + unbound_reg.live |= REG_LIVE_READ; + return 0; +} +late_initcall(unbound_reg_init); + +static bool is_stack_all_misc(struct bpf_verifier_env *env, + struct bpf_stack_state *stack) +{ + u32 i; + + for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) { + if ((stack->slot_type[i] == STACK_MISC) || + (stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack)) + continue; + return false; + } + + return true; +} + +static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env, + struct bpf_stack_state *stack) +{ + if (is_spilled_scalar_reg64(stack)) + return &stack->spilled_ptr; + + if (is_stack_all_misc(env, stack)) + return &unbound_reg; + + return NULL; +} + static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact) { @@ -16521,6 +16584,20 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, if (i >= cur->allocated_stack) return false; + /* 64-bit scalar spill vs all slots MISC and vice versa. + * Load from all slots MISC produces unbound scalar. + * Construct a fake register for such stack and call + * regsafe() to ensure scalar ids are compared. + */ + old_reg = scalar_reg_for_stack(env, &old->stack[spi]); + cur_reg = scalar_reg_for_stack(env, &cur->stack[spi]); + if (old_reg && cur_reg) { + if (!regsafe(env, old_reg, cur_reg, idmap, exact)) + return false; + i += BPF_REG_SIZE - 1; + continue; + } + /* if old state was safe with misc data in the stack * it will be safe with zero-initialized stack. * The opposite is not true @@ -16638,6 +16715,9 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat { int i; + if (old->callback_depth > cur->callback_depth) + return false; + for (i = 0; i < MAX_BPF_REG; i++) if (!regsafe(env, &old->regs[i], &cur->regs[i], &env->idmap_scratch, exact)) @@ -17574,7 +17654,6 @@ static int do_check(struct bpf_verifier_env *env) if (env->cur_state->active_lock.ptr) { if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || - (insn->src_reg == BPF_PSEUDO_CALL) || (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) { verbose(env, "function calls are not allowed while holding a lock\n"); @@ -17622,14 +17701,12 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } process_bpf_exit_full: - if (env->cur_state->active_lock.ptr && - !in_rbtree_lock_required_cb(env)) { + if (env->cur_state->active_lock.ptr && !env->cur_state->curframe) { verbose(env, "bpf_spin_unlock is missing\n"); return -EINVAL; } - if (env->cur_state->active_rcu_lock && - !in_rbtree_lock_required_cb(env)) { + if (env->cur_state->active_rcu_lock && !env->cur_state->curframe) { verbose(env, "bpf_rcu_read_unlock is missing\n"); return -EINVAL; } @@ -17958,6 +18035,8 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_SK_STORAGE: case BPF_MAP_TYPE_TASK_STORAGE: case BPF_MAP_TYPE_CGRP_STORAGE: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: break; default: verbose(env, @@ -19603,7 +19682,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) } if (is_storage_get_function(insn->imm)) { - if (!env->prog->aux->sleepable || + if (!in_sleepable(env) || env->insn_aux_data[i + delta].storage_get_func_atomic) insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC); else @@ -20139,6 +20218,18 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) mark_reg_known_zero(env, regs, i); reg->mem_size = arg->mem_size; reg->id = ++env->id_gen; + } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { + reg->type = PTR_TO_BTF_ID; + if (arg->arg_type & PTR_MAYBE_NULL) + reg->type |= PTR_MAYBE_NULL; + if (arg->arg_type & PTR_UNTRUSTED) + reg->type |= PTR_UNTRUSTED; + if (arg->arg_type & PTR_TRUSTED) + reg->type |= PTR_TRUSTED; + mark_reg_known_zero(env, regs, i); + reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */ + reg->btf_id = arg->btf_id; + reg->id = ++env->id_gen; } else { WARN_ONCE(1, "BUG: unhandled arg#%d type %d\n", i - BPF_REG_1, arg->arg_type); @@ -20351,6 +20442,12 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } } + /* btf_ctx_access() used this to provide argument type info */ + prog->aux->ctx_arg_info = + st_ops_desc->arg_info[member_idx].info; + prog->aux->ctx_arg_info_size = + st_ops_desc->arg_info[member_idx].cnt; + prog->aux->attach_func_proto = func_proto; prog->aux->attach_func_name = mname; env->ops = st_ops->verifier_ops; diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index ba36c073304a..927bef3a598a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2562,7 +2562,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, update_partition_sd_lb(cs, old_prs); out_free: free_cpumasks(NULL, &tmp); - return 0; + return retval; } /** @@ -2598,9 +2598,6 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (cpumask_equal(cs->exclusive_cpus, trialcs->exclusive_cpus)) return 0; - if (alloc_cpumasks(NULL, &tmp)) - return -ENOMEM; - if (*buf) compute_effective_exclusive_cpumask(trialcs, NULL); @@ -2615,6 +2612,9 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (retval) return retval; + if (alloc_cpumasks(NULL, &tmp)) + return -ENOMEM; + if (old_prs) { if (cpumask_empty(trialcs->effective_xcpus)) { invalidate = true; diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index a8350d2d63e6..07e2284bb499 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -562,10 +562,10 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) } /* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */ -BTF_SET8_START(bpf_rstat_kfunc_ids) +BTF_KFUNCS_START(bpf_rstat_kfunc_ids) BTF_ID_FLAGS(func, cgroup_rstat_updated) BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE) -BTF_SET8_END(bpf_rstat_kfunc_ids) +BTF_KFUNCS_END(bpf_rstat_kfunc_ids) static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/events/core.c b/kernel/events/core.c index f0f0f71213a1..5ecfa57e3b97 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9302,10 +9302,6 @@ void perf_event_bpf_event(struct bpf_prog *prog, { struct perf_bpf_event bpf_event; - if (type <= PERF_BPF_EVENT_UNKNOWN || - type >= PERF_BPF_EVENT_MAX) - return; - switch (type) { case PERF_BPF_EVENT_PROG_LOAD: case PERF_BPF_EVENT_PROG_UNLOAD: @@ -9313,7 +9309,7 @@ void perf_event_bpf_event(struct bpf_prog *prog, perf_event_bpf_emit_ksymbols(prog, type); break; default: - break; + return; } if (!atomic_read(&nr_bpf_events)) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 64fdaf79d113..241ddf5e3895 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1412,14 +1412,14 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, __bpf_kfunc_end_defs(); -BTF_SET8_START(key_sig_kfunc_set) +BTF_KFUNCS_START(key_sig_kfunc_set) BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE) #endif -BTF_SET8_END(key_sig_kfunc_set) +BTF_KFUNCS_END(key_sig_kfunc_set) static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = { .owner = THIS_MODULE, @@ -1475,9 +1475,9 @@ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, __bpf_kfunc_end_defs(); -BTF_SET8_START(fs_kfunc_set_ids) +BTF_KFUNCS_START(fs_kfunc_set_ids) BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) -BTF_SET8_END(fs_kfunc_set_ids) +BTF_KFUNCS_END(fs_kfunc_set_ids) static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 6cd2a4e3afb8..9ff018245840 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -189,9 +189,6 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) { int size; - if (num <= 0) - return -EINVAL; - if (!fp->exit_handler) { fp->rethook = NULL; return 0; @@ -199,15 +196,16 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) /* Initialize rethook if needed */ if (fp->nr_maxactive) - size = fp->nr_maxactive; + num = fp->nr_maxactive; else - size = num * num_possible_cpus() * 2; - if (size <= 0) + num *= num_possible_cpus() * 2; + if (num <= 0) return -EINVAL; + size = sizeof(struct fprobe_rethook_node) + fp->entry_data_size; + /* Initialize rethook */ - fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, - sizeof(struct fprobe_rethook_node), size); + fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, size, num); if (IS_ERR(fp->rethook)) return PTR_ERR(fp->rethook); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 975a07f9f1cc..ef36b829ae1f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2235,6 +2235,7 @@ config TEST_DIV64 config TEST_IOV_ITER tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS depends on KUNIT + depends on MMU default KUNIT_ALL_TESTS help Enable this to turn on testing of the operation of the I/O iterator diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c index 225bb7701460..bf70850035c7 100644 --- a/lib/checksum_kunit.c +++ b/lib/checksum_kunit.c @@ -215,7 +215,7 @@ static const u32 init_sums_no_overflow[] = { 0xffff0000, 0xfffffffb, }; -static const __sum16 expected_csum_ipv6_magic[] = { +static const u16 expected_csum_ipv6_magic[] = { 0x18d4, 0x3085, 0x2e4b, 0xd9f4, 0xbdc8, 0x78f, 0x1034, 0x8422, 0x6fc0, 0xd2f6, 0xbeb5, 0x9d3, 0x7e2a, 0x312e, 0x778e, 0xc1bb, 0x7cf2, 0x9d1e, 0xca21, 0xf3ff, 0x7569, 0xb02e, 0xca86, 0x7e76, 0x4539, 0x45e3, 0xf28d, @@ -241,7 +241,7 @@ static const __sum16 expected_csum_ipv6_magic[] = { 0x3845, 0x1014 }; -static const __sum16 expected_fast_csum[] = { +static const u16 expected_fast_csum[] = { 0xda83, 0x45da, 0x4f46, 0x4e4f, 0x34e, 0xe902, 0xa5e9, 0x87a5, 0x7187, 0x5671, 0xf556, 0x6df5, 0x816d, 0x8f81, 0xbb8f, 0xfbba, 0x5afb, 0xbe5a, 0xedbe, 0xabee, 0x6aac, 0xe6b, 0xea0d, 0x67ea, 0x7e68, 0x8a7e, 0x6f8a, @@ -577,7 +577,8 @@ static void test_csum_no_carry_inputs(struct kunit *test) static void test_ip_fast_csum(struct kunit *test) { - __sum16 csum_result, expected; + __sum16 csum_result; + u16 expected; for (int len = IPv4_MIN_WORDS; len < IPv4_MAX_WORDS; len++) { for (int index = 0; index < NUM_IP_FAST_CSUM_TESTS; index++) { @@ -586,7 +587,7 @@ static void test_ip_fast_csum(struct kunit *test) expected_fast_csum[(len - IPv4_MIN_WORDS) * NUM_IP_FAST_CSUM_TESTS + index]; - CHECK_EQ(expected, csum_result); + CHECK_EQ(to_sum16(expected), csum_result); } } } @@ -598,7 +599,7 @@ static void test_csum_ipv6_magic(struct kunit *test) const struct in6_addr *daddr; unsigned int len; unsigned char proto; - unsigned int csum; + __wsum csum; const int daddr_offset = sizeof(struct in6_addr); const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr); @@ -611,10 +612,10 @@ static void test_csum_ipv6_magic(struct kunit *test) saddr = (const struct in6_addr *)(random_buf + i); daddr = (const struct in6_addr *)(random_buf + i + daddr_offset); - len = *(unsigned int *)(random_buf + i + len_offset); + len = le32_to_cpu(*(__le32 *)(random_buf + i + len_offset)); proto = *(random_buf + i + proto_offset); - csum = *(unsigned int *)(random_buf + i + csum_offset); - CHECK_EQ(expected_csum_ipv6_magic[i], + csum = *(__wsum *)(random_buf + i + csum_offset); + CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]), csum_ipv6_magic(saddr, daddr, len, proto, csum)); } #endif /* !CONFIG_NET */ diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e0aa6b440ca5..cf2eb2b2f983 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -166,7 +166,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_IOVEC, - .copy_mc = false, .nofault = false, .data_source = direction, .__iov = iov, @@ -245,26 +244,8 @@ EXPORT_SYMBOL_GPL(_copy_mc_to_iter); #endif /* CONFIG_ARCH_HAS_COPY_MC */ static __always_inline -size_t memcpy_from_iter_mc(void *iter_from, size_t progress, - size_t len, void *to, void *priv2) -{ - return copy_mc_to_kernel(to + progress, iter_from, len); -} - -static size_t __copy_from_iter_mc(void *addr, size_t bytes, struct iov_iter *i) -{ - if (unlikely(i->count < bytes)) - bytes = i->count; - if (unlikely(!bytes)) - return 0; - return iterate_bvec(i, bytes, addr, NULL, memcpy_from_iter_mc); -} - -static __always_inline size_t __copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(iov_iter_is_copy_mc(i))) - return __copy_from_iter_mc(addr, bytes, i); return iterate_and_advance(i, bytes, addr, copy_from_user_iter, memcpy_from_iter); } @@ -633,7 +614,6 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter){ .iter_type = ITER_KVEC, - .copy_mc = false, .data_source = direction, .kvec = kvec, .nr_segs = nr_segs, @@ -650,7 +630,6 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter){ .iter_type = ITER_BVEC, - .copy_mc = false, .data_source = direction, .bvec = bvec, .nr_segs = nr_segs, @@ -679,7 +658,6 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction, BUG_ON(direction & ~1); *i = (struct iov_iter) { .iter_type = ITER_XARRAY, - .copy_mc = false, .data_source = direction, .xarray = xarray, .xarray_start = start, @@ -703,7 +681,6 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) BUG_ON(direction != READ); *i = (struct iov_iter){ .iter_type = ITER_DISCARD, - .copy_mc = false, .data_source = false, .count = count, .iov_offset = 0 diff --git a/lib/nlattr.c b/lib/nlattr.c index ed2ab43e1b22..be9c576b6e2d 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { @@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; /* diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 5caa1f566553..4a7055a63d9f 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -22,6 +22,7 @@ #include <linux/list.h> #include <linux/mm.h> #include <linux/mutex.h> +#include <linux/poison.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> @@ -43,17 +44,7 @@ #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ STACK_DEPOT_EXTRA_BITS) -#if IS_ENABLED(CONFIG_KMSAN) && CONFIG_STACKDEPOT_MAX_FRAMES >= 32 -/* - * KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack - * traces. As KMSAN does not support evicting stack traces from the stack - * depot, the stack depot capacity might be reached quickly with large stack - * records. Adjust the maximum number of stack depot pools for this case. - */ -#define DEPOT_POOLS_CAP (8192 * (CONFIG_STACKDEPOT_MAX_FRAMES / 16)) -#else #define DEPOT_POOLS_CAP 8192 -#endif #define DEPOT_MAX_POOLS \ (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) @@ -93,9 +84,6 @@ struct stack_record { }; }; -#define DEPOT_STACK_RECORD_SIZE \ - ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN) - static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_passed __initdata; @@ -121,32 +109,31 @@ static void *stack_pools[DEPOT_MAX_POOLS]; static void *new_pool; /* Number of pools in stack_pools. */ static int pools_num; +/* Offset to the unused space in the currently used pool. */ +static size_t pool_offset = DEPOT_POOL_SIZE; /* Freelist of stack records within stack_pools. */ static LIST_HEAD(free_stacks); -/* - * Stack depot tries to keep an extra pool allocated even before it runs out - * of space in the currently used pool. This flag marks whether this extra pool - * needs to be allocated. It has the value 0 when either an extra pool is not - * yet allocated or if the limit on the number of pools is reached. - */ -static bool new_pool_required = true; /* The lock must be held when performing pool or freelist modifications. */ static DEFINE_RAW_SPINLOCK(pool_lock); /* Statistics counters for debugfs. */ enum depot_counter_id { - DEPOT_COUNTER_ALLOCS, - DEPOT_COUNTER_FREES, - DEPOT_COUNTER_INUSE, + DEPOT_COUNTER_REFD_ALLOCS, + DEPOT_COUNTER_REFD_FREES, + DEPOT_COUNTER_REFD_INUSE, DEPOT_COUNTER_FREELIST_SIZE, + DEPOT_COUNTER_PERSIST_COUNT, + DEPOT_COUNTER_PERSIST_BYTES, DEPOT_COUNTER_COUNT, }; static long counters[DEPOT_COUNTER_COUNT]; static const char *const counter_names[] = { - [DEPOT_COUNTER_ALLOCS] = "allocations", - [DEPOT_COUNTER_FREES] = "frees", - [DEPOT_COUNTER_INUSE] = "in_use", + [DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations", + [DEPOT_COUNTER_REFD_FREES] = "refcounted_frees", + [DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use", [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", + [DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count", + [DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes", }; static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); @@ -294,48 +281,52 @@ out_unlock: EXPORT_SYMBOL_GPL(stack_depot_init); /* - * Initializes new stack depot @pool, release all its entries to the freelist, - * and update the list of pools. + * Initializes new stack pool, and updates the list of pools. */ -static void depot_init_pool(void *pool) +static bool depot_init_pool(void **prealloc) { - int offset; - lockdep_assert_held(&pool_lock); - /* Initialize handles and link stack records into the freelist. */ - for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; - offset += DEPOT_STACK_RECORD_SIZE) { - struct stack_record *stack = pool + offset; - - stack->handle.pool_index = pools_num; - stack->handle.offset = offset >> DEPOT_STACK_ALIGN; - stack->handle.extra = 0; - - /* - * Stack traces of size 0 are never saved, and we can simply use - * the size field as an indicator if this is a new unused stack - * record in the freelist. - */ - stack->size = 0; + if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { + /* Bail out if we reached the pool limit. */ + WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */ + WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */ + WARN_ONCE(1, "Stack depot reached limit capacity"); + return false; + } - INIT_LIST_HEAD(&stack->hash_list); - /* - * Add to the freelist front to prioritize never-used entries: - * required in case there are entries in the freelist, but their - * RCU cookie still belongs to the current RCU grace period - * (there can still be concurrent readers). - */ - list_add(&stack->free_list, &free_stacks); - counters[DEPOT_COUNTER_FREELIST_SIZE]++; + if (!new_pool && *prealloc) { + /* We have preallocated memory, use it. */ + WRITE_ONCE(new_pool, *prealloc); + *prealloc = NULL; } + if (!new_pool) + return false; /* new_pool and *prealloc are NULL */ + /* Save reference to the pool to be used by depot_fetch_stack(). */ - stack_pools[pools_num] = pool; + stack_pools[pools_num] = new_pool; + + /* + * Stack depot tries to keep an extra pool allocated even before it runs + * out of space in the currently used pool. + * + * To indicate that a new preallocation is needed new_pool is reset to + * NULL; do not reset to NULL if we have reached the maximum number of + * pools. + */ + if (pools_num < DEPOT_MAX_POOLS) + WRITE_ONCE(new_pool, NULL); + else + WRITE_ONCE(new_pool, STACK_DEPOT_POISON); /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ WRITE_ONCE(pools_num, pools_num + 1); ASSERT_EXCLUSIVE_WRITER(pools_num); + + pool_offset = 0; + + return true; } /* Keeps the preallocated memory to be used for a new stack depot pool. */ @@ -347,63 +338,51 @@ static void depot_keep_new_pool(void **prealloc) * If a new pool is already saved or the maximum number of * pools is reached, do not use the preallocated memory. */ - if (!new_pool_required) + if (new_pool) return; - /* - * Use the preallocated memory for the new pool - * as long as we do not exceed the maximum number of pools. - */ - if (pools_num < DEPOT_MAX_POOLS) { - new_pool = *prealloc; - *prealloc = NULL; - } - - /* - * At this point, either a new pool is kept or the maximum - * number of pools is reached. In either case, take note that - * keeping another pool is not required. - */ - WRITE_ONCE(new_pool_required, false); + WRITE_ONCE(new_pool, *prealloc); + *prealloc = NULL; } /* - * Try to initialize a new stack depot pool from either a previous or the - * current pre-allocation, and release all its entries to the freelist. + * Try to initialize a new stack record from the current pool, a cached pool, or + * the current pre-allocation. */ -static bool depot_try_init_pool(void **prealloc) +static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) { + struct stack_record *stack; + void *current_pool; + u32 pool_index; + lockdep_assert_held(&pool_lock); - /* Check if we have a new pool saved and use it. */ - if (new_pool) { - depot_init_pool(new_pool); - new_pool = NULL; + if (pool_offset + size > DEPOT_POOL_SIZE) { + if (!depot_init_pool(prealloc)) + return NULL; + } - /* Take note that we might need a new new_pool. */ - if (pools_num < DEPOT_MAX_POOLS) - WRITE_ONCE(new_pool_required, true); + if (WARN_ON_ONCE(pools_num < 1)) + return NULL; + pool_index = pools_num - 1; + current_pool = stack_pools[pool_index]; + if (WARN_ON_ONCE(!current_pool)) + return NULL; - return true; - } + stack = current_pool + pool_offset; - /* Bail out if we reached the pool limit. */ - if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { - WARN_ONCE(1, "Stack depot reached limit capacity"); - return false; - } + /* Pre-initialize handle once. */ + stack->handle.pool_index = pool_index; + stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; + stack->handle.extra = 0; + INIT_LIST_HEAD(&stack->hash_list); - /* Check if we have preallocated memory and use it. */ - if (*prealloc) { - depot_init_pool(*prealloc); - *prealloc = NULL; - return true; - } + pool_offset += size; - return false; + return stack; } -/* Try to find next free usable entry. */ +/* Try to find next free usable entry from the freelist. */ static struct stack_record *depot_pop_free(void) { struct stack_record *stack; @@ -420,7 +399,7 @@ static struct stack_record *depot_pop_free(void) * check the first entry. */ stack = list_first_entry(&free_stacks, struct stack_record, free_list); - if (stack->size && !poll_state_synchronize_rcu(stack->rcu_state)) + if (!poll_state_synchronize_rcu(stack->rcu_state)) return NULL; list_del(&stack->free_list); @@ -429,48 +408,73 @@ static struct stack_record *depot_pop_free(void) return stack; } +static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries) +{ + const size_t used = flex_array_size(s, entries, nr_entries); + const size_t unused = sizeof(s->entries) - used; + + WARN_ON_ONCE(sizeof(s->entries) < used); + + return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN); +} + /* Allocates a new stack in a stack depot pool. */ static struct stack_record * -depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) +depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc) { - struct stack_record *stack; + struct stack_record *stack = NULL; + size_t record_size; lockdep_assert_held(&pool_lock); /* This should already be checked by public API entry points. */ - if (WARN_ON_ONCE(!size)) + if (WARN_ON_ONCE(!nr_entries)) return NULL; - /* Check if we have a stack record to save the stack trace. */ - stack = depot_pop_free(); - if (!stack) { - /* No usable entries on the freelist - try to refill the freelist. */ - if (!depot_try_init_pool(prealloc)) - return NULL; + /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ + if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES) + nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES; + + if (flags & STACK_DEPOT_FLAG_GET) { + /* + * Evictable entries have to allocate the max. size so they may + * safely be re-used by differently sized allocations. + */ + record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES); stack = depot_pop_free(); - if (WARN_ON(!stack)) - return NULL; + } else { + record_size = depot_stack_record_size(stack, nr_entries); } - /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ - if (size > CONFIG_STACKDEPOT_MAX_FRAMES) - size = CONFIG_STACKDEPOT_MAX_FRAMES; + if (!stack) { + stack = depot_pop_free_pool(prealloc, record_size); + if (!stack) + return NULL; + } /* Save the stack trace. */ stack->hash = hash; - stack->size = size; - /* stack->handle is already filled in by depot_init_pool(). */ - refcount_set(&stack->count, 1); - memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); + stack->size = nr_entries; + /* stack->handle is already filled in by depot_pop_free_pool(). */ + memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries)); + + if (flags & STACK_DEPOT_FLAG_GET) { + refcount_set(&stack->count, 1); + counters[DEPOT_COUNTER_REFD_ALLOCS]++; + counters[DEPOT_COUNTER_REFD_INUSE]++; + } else { + /* Warn on attempts to switch to refcounting this entry. */ + refcount_set(&stack->count, REFCOUNT_SATURATED); + counters[DEPOT_COUNTER_PERSIST_COUNT]++; + counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size; + } /* * Let KMSAN know the stored stack record is initialized. This shall * prevent false positive reports if instrumented code accesses it. */ - kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE); + kmsan_unpoison_memory(stack, record_size); - counters[DEPOT_COUNTER_ALLOCS]++; - counters[DEPOT_COUNTER_INUSE]++; return stack; } @@ -538,8 +542,8 @@ static void depot_free_stack(struct stack_record *stack) list_add_tail(&stack->free_list, &free_stacks); counters[DEPOT_COUNTER_FREELIST_SIZE]++; - counters[DEPOT_COUNTER_FREES]++; - counters[DEPOT_COUNTER_INUSE]--; + counters[DEPOT_COUNTER_REFD_FREES]++; + counters[DEPOT_COUNTER_REFD_INUSE]--; printk_deferred_exit(); raw_spin_unlock_irqrestore(&pool_lock, flags); @@ -660,7 +664,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, * Allocate memory for a new pool if required now: * we won't be able to do that under the lock. */ - if (unlikely(can_alloc && READ_ONCE(new_pool_required))) { + if (unlikely(can_alloc && !READ_ONCE(new_pool))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -681,7 +685,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, found = find_stack(bucket, entries, nr_entries, hash, depot_flags); if (!found) { struct stack_record *new = - depot_alloc_stack(entries, nr_entries, hash, &prealloc); + depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc); if (new) { /* diff --git a/mm/damon/core.c b/mm/damon/core.c index 36f6f1d21ff0..5b325749fc12 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1026,6 +1026,9 @@ static void damon_do_apply_schemes(struct damon_ctx *c, damon_for_each_scheme(s, c) { struct damos_quota *quota = &s->quota; + if (c->passed_sample_intervals != s->next_apply_sis) + continue; + if (!s->wmarks.activated) continue; @@ -1176,10 +1179,6 @@ static void kdamond_apply_schemes(struct damon_ctx *c) if (c->passed_sample_intervals != s->next_apply_sis) continue; - s->next_apply_sis += - (s->apply_interval_us ? s->apply_interval_us : - c->attrs.aggr_interval) / sample_interval; - if (!s->wmarks.activated) continue; @@ -1195,6 +1194,14 @@ static void kdamond_apply_schemes(struct damon_ctx *c) damon_for_each_region_safe(r, next_r, t) damon_do_apply_schemes(c, t, r); } + + damon_for_each_scheme(s, c) { + if (c->passed_sample_intervals != s->next_apply_sis) + continue; + s->next_apply_sis += + (s->apply_interval_us ? s->apply_interval_us : + c->attrs.aggr_interval) / sample_interval; + } } /* diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index f2e5f9431892..3de2916a65c3 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -185,9 +185,21 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO); } +static void damon_lru_sort_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_lru_sort_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *hot_scheme, *cold_scheme; + struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL; unsigned int hot_thres, cold_thres; int err = 0; @@ -195,18 +207,35 @@ static int damon_lru_sort_apply_parameters(void) if (err) return err; + damon_for_each_scheme(scheme, ctx) { + if (!old_hot_scheme) { + old_hot_scheme = scheme; + continue; + } + old_cold_scheme = scheme; + } + hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) * hot_thres_access_freq / 1000; - scheme = damon_lru_sort_new_hot_scheme(hot_thres); - if (!scheme) + hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres); + if (!hot_scheme) return -ENOMEM; - damon_set_schemes(ctx, &scheme, 1); + if (old_hot_scheme) + damon_lru_sort_copy_quota_status(&hot_scheme->quota, + &old_hot_scheme->quota); cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval; - scheme = damon_lru_sort_new_cold_scheme(cold_thres); - if (!scheme) + cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres); + if (!cold_scheme) { + damon_destroy_scheme(hot_scheme); return -ENOMEM; - damon_add_scheme(ctx, scheme); + } + if (old_cold_scheme) + damon_lru_sort_copy_quota_status(&cold_scheme->quota, + &old_cold_scheme->quota); + + damon_set_schemes(ctx, &hot_scheme, 1); + damon_add_scheme(ctx, cold_scheme); return damon_set_region_biggest_system_ram_default(target, &monitor_region_start, diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index ab974e477d2f..66e190f0374a 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -150,9 +150,20 @@ static struct damos *damon_reclaim_new_scheme(void) &damon_reclaim_wmarks); } +static void damon_reclaim_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_reclaim_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *old_scheme; struct damos_filter *filter; int err = 0; @@ -164,6 +175,11 @@ static int damon_reclaim_apply_parameters(void) scheme = damon_reclaim_new_scheme(); if (!scheme) return -ENOMEM; + if (!list_empty(&ctx->schemes)) { + damon_for_each_scheme(old_scheme, ctx) + damon_reclaim_copy_quota_status(&scheme->quota, + &old_scheme->quota); + } if (skip_anon) { filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true); if (!filter) { diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index dd2fb5127009..ae0f0b314f3a 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -1905,6 +1905,10 @@ void damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes, damon_for_each_scheme(scheme, ctx) { struct damon_sysfs_scheme *sysfs_scheme; + /* user could have removed the scheme sysfs dir */ + if (i >= sysfs_schemes->nr) + break; + sysfs_scheme = sysfs_schemes->schemes_arr[i]; damos_sysfs_set_quota_score(sysfs_scheme->quotas->goals, &scheme->quota); diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 5662e29fe253..65c19025da3d 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -362,6 +362,12 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) vaddr &= HPAGE_PUD_MASK; pud = pfn_pud(args->pud_pfn, args->page_prot); + /* + * Some architectures have debug checks to make sure + * huge pud mapping are only found with devmap entries + * For now test with only devmap entries. + */ + pud = pud_mkdevmap(pud); set_pud_at(args->mm, vaddr, args->pudp, pud); flush_dcache_page(page); pudp_set_wrprotect(args->mm, vaddr, args->pudp); @@ -374,6 +380,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) WARN_ON(!pud_none(pud)); #endif /* __PAGETABLE_PMD_FOLDED */ pud = pfn_pud(args->pud_pfn, args->page_prot); + pud = pud_mkdevmap(pud); pud = pud_wrprotect(pud); pud = pud_mkclean(pud); set_pud_at(args->mm, vaddr, args->pudp, pud); @@ -391,6 +398,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) #endif /* __PAGETABLE_PMD_FOLDED */ pud = pfn_pud(args->pud_pfn, args->page_prot); + pud = pud_mkdevmap(pud); pud = pud_mkyoung(pud); set_pud_at(args->mm, vaddr, args->pudp, pud); flush_dcache_page(page); diff --git a/mm/filemap.c b/mm/filemap.c index 750e779c23db..4a30de98a8c7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4111,28 +4111,40 @@ static void filemap_cachestat(struct address_space *mapping, rcu_read_lock(); xas_for_each(&xas, folio, last_index) { + int order; unsigned long nr_pages; pgoff_t folio_first_index, folio_last_index; + /* + * Don't deref the folio. It is not pinned, and might + * get freed (and reused) underneath us. + * + * We *could* pin it, but that would be expensive for + * what should be a fast and lightweight syscall. + * + * Instead, derive all information of interest from + * the rcu-protected xarray. + */ + if (xas_retry(&xas, folio)) continue; + order = xa_get_order(xas.xa, xas.xa_index); + nr_pages = 1 << order; + folio_first_index = round_down(xas.xa_index, 1 << order); + folio_last_index = folio_first_index + nr_pages - 1; + + /* Folios might straddle the range boundaries, only count covered pages */ + if (folio_first_index < first_index) + nr_pages -= first_index - folio_first_index; + + if (folio_last_index > last_index) + nr_pages -= folio_last_index - last_index; + if (xa_is_value(folio)) { /* page is evicted */ void *shadow = (void *)folio; bool workingset; /* not used */ - int order = xa_get_order(xas.xa, xas.xa_index); - - nr_pages = 1 << order; - folio_first_index = round_down(xas.xa_index, 1 << order); - folio_last_index = folio_first_index + nr_pages - 1; - - /* Folios might straddle the range boundaries, only count covered pages */ - if (folio_first_index < first_index) - nr_pages -= first_index - folio_first_index; - - if (folio_last_index > last_index) - nr_pages -= folio_last_index - last_index; cs->nr_evicted += nr_pages; @@ -4150,24 +4162,13 @@ static void filemap_cachestat(struct address_space *mapping, goto resched; } - nr_pages = folio_nr_pages(folio); - folio_first_index = folio_pgoff(folio); - folio_last_index = folio_first_index + nr_pages - 1; - - /* Folios might straddle the range boundaries, only count covered pages */ - if (folio_first_index < first_index) - nr_pages -= first_index - folio_first_index; - - if (folio_last_index > last_index) - nr_pages -= folio_last_index - last_index; - /* page is in cache */ cs->nr_cache += nr_pages; - if (folio_test_dirty(folio)) + if (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY)) cs->nr_dirty += nr_pages; - if (folio_test_writeback(folio)) + if (xas_get_mark(&xas, PAGECACHE_TAG_WRITEBACK)) cs->nr_writeback += nr_pages; resched: diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 610efae91220..6ca63e8dda74 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -65,8 +65,7 @@ void kasan_save_track(struct kasan_track *track, gfp_t flags) { depot_stack_handle_t stack; - stack = kasan_save_stack(flags, - STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET); + stack = kasan_save_stack(flags, STACK_DEPOT_FLAG_CAN_ALLOC); kasan_set_track(track, stack); } @@ -266,10 +265,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, return true; /* - * If the object is not put into quarantine, it will likely be quickly - * reallocated. Thus, release its metadata now. + * Note: Keep per-object metadata to allow KASAN print stack traces for + * use-after-free-before-realloc bugs. */ - kasan_release_object_meta(cache, object); /* Let slab put the object onto the freelist. */ return false; diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index df6627f62402..1900f8576034 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -485,16 +485,6 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object) if (alloc_meta) { /* Zero out alloc meta to mark it as invalid. */ __memset(alloc_meta, 0, sizeof(*alloc_meta)); - - /* - * Prepare the lock for saving auxiliary stack traces. - * Temporarily disable KASAN bug reporting to allow instrumented - * raw_spin_lock_init to access aux_lock, which resides inside - * of a redzone. - */ - kasan_disable_current(); - raw_spin_lock_init(&alloc_meta->aux_lock); - kasan_enable_current(); } /* @@ -506,47 +496,23 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object) static void release_alloc_meta(struct kasan_alloc_meta *meta) { - /* Evict the stack traces from stack depot. */ - stack_depot_put(meta->alloc_track.stack); - stack_depot_put(meta->aux_stack[0]); - stack_depot_put(meta->aux_stack[1]); - - /* - * Zero out alloc meta to mark it as invalid but keep aux_lock - * initialized to avoid having to reinitialize it when another object - * is allocated in the same slot. - */ - __memset(&meta->alloc_track, 0, sizeof(meta->alloc_track)); - __memset(meta->aux_stack, 0, sizeof(meta->aux_stack)); + /* Zero out alloc meta to mark it as invalid. */ + __memset(meta, 0, sizeof(*meta)); } static void release_free_meta(const void *object, struct kasan_free_meta *meta) { + if (!kasan_arch_is_ready()) + return; + /* Check if free meta is valid. */ if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_SLAB_FREE_META) return; - /* Evict the stack trace from the stack depot. */ - stack_depot_put(meta->free_track.stack); - /* Mark free meta as invalid. */ *(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE; } -void kasan_release_object_meta(struct kmem_cache *cache, const void *object) -{ - struct kasan_alloc_meta *alloc_meta; - struct kasan_free_meta *free_meta; - - alloc_meta = kasan_get_alloc_meta(cache, object); - if (alloc_meta) - release_alloc_meta(alloc_meta); - - free_meta = kasan_get_free_meta(cache, object); - if (free_meta) - release_free_meta(object, free_meta); -} - size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object) { struct kasan_cache *info = &cache->kasan_info; @@ -571,8 +537,6 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags) struct kmem_cache *cache; struct kasan_alloc_meta *alloc_meta; void *object; - depot_stack_handle_t new_handle, old_handle; - unsigned long flags; if (is_kfence_address(addr) || !slab) return; @@ -583,33 +547,18 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags) if (!alloc_meta) return; - new_handle = kasan_save_stack(0, depot_flags); - - /* - * Temporarily disable KASAN bug reporting to allow instrumented - * spinlock functions to access aux_lock, which resides inside of a - * redzone. - */ - kasan_disable_current(); - raw_spin_lock_irqsave(&alloc_meta->aux_lock, flags); - old_handle = alloc_meta->aux_stack[1]; alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; - alloc_meta->aux_stack[0] = new_handle; - raw_spin_unlock_irqrestore(&alloc_meta->aux_lock, flags); - kasan_enable_current(); - - stack_depot_put(old_handle); + alloc_meta->aux_stack[0] = kasan_save_stack(0, depot_flags); } void kasan_record_aux_stack(void *addr) { - return __kasan_record_aux_stack(addr, - STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET); + return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_CAN_ALLOC); } void kasan_record_aux_stack_noalloc(void *addr) { - return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_GET); + return __kasan_record_aux_stack(addr, 0); } void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) @@ -620,7 +569,7 @@ void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) if (!alloc_meta) return; - /* Evict previous stack traces (might exist for krealloc or mempool). */ + /* Invalidate previous stack traces (might exist for krealloc or mempool). */ release_alloc_meta(alloc_meta); kasan_save_track(&alloc_meta->alloc_track, flags); @@ -634,7 +583,7 @@ void kasan_save_free_info(struct kmem_cache *cache, void *object) if (!free_meta) return; - /* Evict previous stack trace (might exist for mempool). */ + /* Invalidate previous stack trace (might exist for mempool). */ release_free_meta(object, free_meta); kasan_save_track(&free_meta->free_track, 0); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index d0f172f2b978..fb2b9ac0659a 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -6,7 +6,6 @@ #include <linux/kasan.h> #include <linux/kasan-tags.h> #include <linux/kfence.h> -#include <linux/spinlock.h> #include <linux/stackdepot.h> #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) @@ -265,13 +264,6 @@ struct kasan_global { struct kasan_alloc_meta { struct kasan_track alloc_track; /* Free track is stored in kasan_free_meta. */ - /* - * aux_lock protects aux_stack from accesses from concurrent - * kasan_record_aux_stack calls. It is a raw spinlock to avoid sleeping - * on RT kernels, as kasan_record_aux_stack_noalloc can be called from - * non-sleepable contexts. - */ - raw_spinlock_t aux_lock; depot_stack_handle_t aux_stack[2]; }; @@ -398,10 +390,8 @@ struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache, struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, const void *object); void kasan_init_object_meta(struct kmem_cache *cache, const void *object); -void kasan_release_object_meta(struct kmem_cache *cache, const void *object); #else static inline void kasan_init_object_meta(struct kmem_cache *cache, const void *object) { } -static inline void kasan_release_object_meta(struct kmem_cache *cache, const void *object) { } #endif depot_stack_handle_t kasan_save_stack(gfp_t flags, depot_flags_t depot_flags); diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 3ba02efb952a..6958aa713c67 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -145,7 +145,10 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache) void *object = qlink_to_object(qlink, cache); struct kasan_free_meta *free_meta = kasan_get_free_meta(cache, object); - kasan_release_object_meta(cache, object); + /* + * Note: Keep per-object metadata to allow KASAN print stack traces for + * use-after-free-before-realloc bugs. + */ /* * If init_on_free is enabled and KASAN's free metadata is stored in diff --git a/mm/memblock.c b/mm/memblock.c index 4dcb2ee35eca..d09136e040d3 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -180,8 +180,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) /* * Address comparison utilities */ -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) +unsigned long __init_memblock +memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, + phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } @@ -2249,6 +2250,7 @@ static const char * const flagname[] = { [ilog2(MEMBLOCK_MIRROR)] = "MIRROR", [ilog2(MEMBLOCK_NOMAP)] = "NOMAP", [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG", + [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT", }; static int memblock_debug_show(struct seq_file *m, void *private) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1ed40f9d3a27..61932c9215e7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7971,9 +7971,13 @@ bool mem_cgroup_swap_full(struct folio *folio) static int __init setup_swap_account(char *s) { - pr_warn_once("The swapaccount= commandline option is deprecated. " - "Please report your usecase to linux-mm@kvack.org if you " - "depend on this functionality.\n"); + bool res; + + if (!kstrtobool(s, &res) && !res) + pr_warn_once("The swapaccount=0 commandline option is deprecated " + "in favor of configuring swap control via cgroupfs. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); return 1; } __setup("swapaccount=", setup_swap_account); diff --git a/mm/memory.c b/mm/memory.c index 15f8b10ea17c..0bfc8b007c01 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3799,6 +3799,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) struct page *page; struct swap_info_struct *si = NULL; rmap_t rmap_flags = RMAP_NONE; + bool need_clear_cache = false; bool exclusive = false; swp_entry_t entry; pte_t pte; @@ -3867,6 +3868,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address, false); @@ -4117,6 +4132,9 @@ unlock: if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; @@ -4131,6 +4149,8 @@ out_release: folio_unlock(swapcache); folio_put(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; diff --git a/mm/migrate.c b/mm/migrate.c index cc9f2bcd73b4..c27b1f8097d4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2519,6 +2519,14 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio) if (managed_zone(pgdat->node_zones + z)) break; } + + /* + * If there are no managed zones, it should not proceed + * further. + */ + if (z < 0) + return 0; + wakeup_kswapd(pgdat->node_zones + z, 0, folio_order(folio), ZONE_MOVABLE); return 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 150d4f23b010..06aa1ebbd21c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4685,8 +4685,8 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, gfp_t gfp = gfp_mask; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | - __GFP_NOMEMALLOC; + gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | + __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, PAGE_FRAG_CACHE_MAX_ORDER); nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE; @@ -4699,6 +4699,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, return page; } +void page_frag_cache_drain(struct page_frag_cache *nc) +{ + if (!nc->va) + return; + + __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias); + nc->va = NULL; +} +EXPORT_SYMBOL(page_frag_cache_drain); + void __page_frag_cache_drain(struct page *page, unsigned int count) { VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); @@ -4708,9 +4718,9 @@ void __page_frag_cache_drain(struct page *page, unsigned int count) } EXPORT_SYMBOL(__page_frag_cache_drain); -void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask) +void *__page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align_mask) { unsigned int size = PAGE_SIZE; struct page *page; @@ -4779,7 +4789,7 @@ refill: return nc->va + offset; } -EXPORT_SYMBOL(page_frag_alloc_align); +EXPORT_SYMBOL(__page_frag_alloc_align); /* * Frees a page fragment allocated out of either a compound or order 0 page. diff --git a/mm/swap.h b/mm/swap.h index 758c46ca671e..fc2f6ade7f80 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -41,6 +41,7 @@ void __delete_from_swap_cache(struct folio *folio, void delete_from_swap_cache(struct folio *folio); void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct folio *filemap_get_incore_folio(struct address_space *mapping, @@ -97,6 +98,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr) { diff --git a/mm/swap_state.c b/mm/swap_state.c index e671266ad772..7255c01a1e4e 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -680,9 +680,10 @@ skip: /* The page was likely read above, so no need for plugging here */ folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); - if (unlikely(page_allocated)) + if (unlikely(page_allocated)) { + zswap_folio_swapin(folio); swap_read_folio(folio, false, NULL); - zswap_folio_swapin(folio); + } return folio; } @@ -855,9 +856,10 @@ skip: /* The folio was likely read above, so no need for plugging here */ folio = __read_swap_cache_async(targ_entry, gfp_mask, mpol, targ_ilx, &page_allocated, false); - if (unlikely(page_allocated)) + if (unlikely(page_allocated)) { + zswap_folio_swapin(folio); swap_read_folio(folio, false, NULL); - zswap_folio_swapin(folio); + } return folio; } diff --git a/mm/swapfile.c b/mm/swapfile.c index 556ff7347d5f..746aa9da5302 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3365,6 +3365,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); diff --git a/mm/zswap.c b/mm/zswap.c index 350dd2fc8159..db4625af65fb 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -377,10 +377,9 @@ void zswap_folio_swapin(struct folio *folio) { struct lruvec *lruvec; - if (folio) { - lruvec = folio_lruvec(folio); - atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected); - } + VM_WARN_ON_ONCE(!folio_test_locked(folio)); + lruvec = folio_lruvec(folio); + atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected); } /********************************* @@ -1440,6 +1439,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry, if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { spin_unlock(&tree->lock); delete_from_swap_cache(folio); + folio_unlock(folio); + folio_put(folio); return -ENOMEM; } spin_unlock(&tree->lock); @@ -1517,7 +1518,7 @@ bool zswap_store(struct folio *folio) if (folio_test_large(folio)) return false; - if (!zswap_enabled || !tree) + if (!tree) return false; /* @@ -1532,6 +1533,10 @@ bool zswap_store(struct folio *folio) zswap_invalidate_entry(tree, dupentry); } spin_unlock(&tree->lock); + + if (!zswap_enabled) + return false; + objcg = get_obj_cgroup_from_folio(folio); if (objcg && !obj_cgroup_may_zswap(objcg)) { memcg = get_mem_cgroup_from_objcg(objcg); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 65601aa52e0d..2821a42cefdc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + hci_dev_hold(hdev); BT_DBG("%s", hdev->name); if (hdev->hw_error) @@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work) else bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); - if (hci_dev_do_close(hdev)) - return; + if (!hci_dev_do_close(hdev)) + hci_dev_do_open(hdev); - hci_dev_do_open(hdev); + hci_dev_put(hdev); } void hci_uuids_clear(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ef8c3bed7361..2a5f5a7d2412 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5329,9 +5329,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn || !hci_conn_ssp_enabled(conn)) + if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) goto unlock; + /* Assume remote supports SSP since it has triggered this event */ + set_bit(HCI_CONN_SSP_ENABLED, &conn->flags); + hci_conn_hold(conn); if (!hci_dev_test_flag(hdev, HCI_MGMT)) @@ -6794,6 +6797,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_UNKNOWN_CONN_ID); + if (max > hcon->le_conn_max_interval) + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + if (hci_check_conn_params(min, max, latency, timeout)) return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_INVALID_LL_PARAMS); @@ -7420,10 +7427,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, * keep track of the bdaddr of the connection event that woke us up. */ if (event == HCI_EV_CONN_REQUEST) { - bacpy(&hdev->wake_addr, &conn_complete->bdaddr); + bacpy(&hdev->wake_addr, &conn_request->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_CONN_COMPLETE) { - bacpy(&hdev->wake_addr, &conn_request->bdaddr); + bacpy(&hdev->wake_addr, &conn_complete->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_LE_META) { struct hci_ev_le_meta *le_ev = (void *)skb->data; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a6fc8a2a5c67..5716345a26df 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2206,8 +2206,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev, /* During suspend, only wakeable devices can be in acceptlist */ if (hdev->suspended && - !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) + !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) { + hci_le_del_accept_list_sync(hdev, ¶ms->addr, + params->addr_type); return 0; + } /* Select filter policy to accept all advertising */ if (*num_entries >= hdev->le_accept_list_size) @@ -5559,7 +5562,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) bt_dev_dbg(hdev, ""); - if (hci_dev_test_flag(hdev, HCI_INQUIRY)) + if (test_bit(HCI_INQUIRY, &hdev->flags)) return 0; hci_dev_lock(hdev); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 60298975d5c4..656f49b299d2 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5613,7 +5613,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - err = hci_check_conn_params(min, max, latency, to_multiplier); + if (max > hcon->le_conn_max_interval) { + BT_DBG("requested connection interval exceeds current bounds."); + err = -EINVAL; + } else { + err = hci_check_conn_params(min, max, latency, to_multiplier); + } + if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index bb72ff6eb22f..ee3b4aad8bd8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1045,6 +1045,8 @@ static void rpa_expired(struct work_struct *work) hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL); } +static int set_discoverable_sync(struct hci_dev *hdev, void *data); + static void discov_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1063,7 +1065,7 @@ static void discov_off(struct work_struct *work) hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hdev->discov_timeout = 0; - hci_update_discoverable(hdev); + hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL); mgmt_new_settings(hdev); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 053ef8f25fae..1d34d8497033 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1941,7 +1941,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) /* Get data directly from socket receive queue without copying it. */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - if (!skb_linearize(skb)) { + if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) { s = rfcomm_recv_frame(s, skb); if (!s) break; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dfd919374017..5535f9adc658 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -617,21 +617,21 @@ CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor); __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_test_modify_return_ids) +BTF_KFUNCS_START(bpf_test_modify_return_ids) BTF_ID_FLAGS(func, bpf_modify_return_test) BTF_ID_FLAGS(func, bpf_modify_return_test2) BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE) -BTF_SET8_END(bpf_test_modify_return_ids) +BTF_KFUNCS_END(bpf_test_modify_return_ids) static const struct btf_kfunc_id_set bpf_test_modify_return_set = { .owner = THIS_MODULE, .set = &bpf_test_modify_return_ids, }; -BTF_SET8_START(test_sk_check_kfunc_ids) +BTF_KFUNCS_START(test_sk_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) -BTF_SET8_END(test_sk_check_kfunc_ids) +BTF_KFUNCS_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, u32 size, u32 headroom, u32 tailroom) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index ed1720890757..35e10c5a766d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -43,6 +43,10 @@ #include <linux/sysctl.h> #endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack_core.h> +#endif + static unsigned int brnf_net_id __read_mostly; struct brnf_net { @@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv, return NF_STOLEN; } +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +/* conntracks' nf_confirm logic cannot handle cloned skbs referencing + * the same nf_conn entry, which will happen for multicast (broadcast) + * Frames on bridges. + * + * Example: + * macvlan0 + * br0 + * ethX ethY + * + * ethX (or Y) receives multicast or broadcast packet containing + * an IP packet, not yet in conntrack table. + * + * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. + * -> skb->_nfct now references a unconfirmed entry + * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge + * interface. + * 3. skb gets passed up the stack. + * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb + * and schedules a work queue to send them out on the lower devices. + * + * The clone skb->_nfct is not a copy, it is the same entry as the + * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. + * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. + * + * The Macvlan broadcast worker and normal confirm path will race. + * + * This race will not happen if step 2 already confirmed a clone. In that + * case later steps perform skb_clone() with skb->_nfct already confirmed (in + * hash table). This works fine. + * + * But such confirmation won't happen when eb/ip/nftables rules dropped the + * packets before they reached the nf_confirm step in postrouting. + * + * Work around this problem by explicit confirmation of the entry at + * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed + * entry. + * + */ +static unsigned int br_nf_local_in(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conntrack *nfct = skb_nfct(skb); + const struct nf_ct_hook *ct_hook; + struct nf_conn *ct; + int ret; + + if (!nfct || skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + ct = container_of(nfct, struct nf_conn, ct_general); + if (likely(nf_ct_is_confirmed(ct))) + return NF_ACCEPT; + + WARN_ON_ONCE(skb_shared(skb)); + WARN_ON_ONCE(refcount_read(&nfct->use) != 1); + + /* We can't call nf_confirm here, it would create a dependency + * on nf_conntrack module. + */ + ct_hook = rcu_dereference(nf_ct_hook); + if (!ct_hook) { + skb->_nfct = 0ul; + nf_conntrack_put(nfct); + return NF_ACCEPT; + } + + nf_bridge_pull_encap_header(skb); + ret = ct_hook->confirm(skb); + switch (ret & NF_VERDICT_MASK) { + case NF_STOLEN: + return NF_STOLEN; + default: + nf_bridge_push_encap_header(skb); + break; + } + + ct = container_of(nfct, struct nf_conn, ct_general); + WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); + + return ret; +} +#endif /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + { + .hook = br_nf_local_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_LAST, + }, +#endif { .hook = br_nf_forward, .pf = NFPROTO_BRIDGE, diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index abb090f94ed2..6f877e31709b 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, return nf_conntrack_in(skb, &bridge_state); } +static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + if (skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + /* nf_conntrack_confirm() cannot handle concurrent clones, + * this happens for broad/multicast frames with e.g. macvlan on top + * of the bridge device. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + return NF_ACCEPT; + + /* let inet prerouting call conntrack again */ + skb->_nfct = 0; + nf_ct_put(ct); + + return NF_ACCEPT; +} + static void nf_ct_bridge_frag_save(struct sk_buff *skb, struct nf_bridge_frag_data *data) { @@ -386,6 +410,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { .priority = NF_IP_PRI_CONNTRACK, }, { + .hook = nf_ct_bridge_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, + { .hook = nf_ct_bridge_post, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, diff --git a/net/core/dev.c b/net/core/dev.c index 34b39c03e97d..bf933eeaa688 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -180,8 +180,9 @@ static DECLARE_RWSEM(devnet_rename_sem); static inline void dev_base_seq_inc(struct net *net) { - while (++net->dev_base_seq == 0) - ; + unsigned int val = net->dev_base_seq + 1; + + WRITE_ONCE(net->dev_base_seq, val ?: 1); } static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) @@ -9121,28 +9122,6 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) } EXPORT_SYMBOL(netdev_port_same_parent_id); -static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) -{ -#if IS_ENABLED(CONFIG_DPLL) - rtnl_lock(); - dev->dpll_pin = dpll_pin; - rtnl_unlock(); -#endif -} - -void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) -{ - WARN_ON(!dpll_pin); - netdev_dpll_pin_assign(dev, dpll_pin); -} -EXPORT_SYMBOL(netdev_dpll_pin_set); - -void netdev_dpll_pin_clear(struct net_device *dev) -{ - netdev_dpll_pin_assign(dev, NULL); -} -EXPORT_SYMBOL(netdev_dpll_pin_clear); - /** * dev_change_proto_down - set carrier according to proto_down. * diff --git a/net/core/filter.c b/net/core/filter.c index 358870408a51..8adf95765cdd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5988,7 +5988,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return -ENODEV; idev = __in6_dev_get_safely(dev); - if (unlikely(!idev || !idev->cnf.forwarding)) + if (unlikely(!idev || !READ_ONCE(idev->cnf.forwarding))) return BPF_FIB_LKUP_RET_FWD_DISABLED; if (flags & BPF_FIB_LOOKUP_OUTPUT) { @@ -11982,21 +11982,21 @@ int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, return 0; } -BTF_SET8_START(bpf_kfunc_check_set_skb) +BTF_KFUNCS_START(bpf_kfunc_check_set_skb) BTF_ID_FLAGS(func, bpf_dynptr_from_skb) -BTF_SET8_END(bpf_kfunc_check_set_skb) +BTF_KFUNCS_END(bpf_kfunc_check_set_skb) -BTF_SET8_START(bpf_kfunc_check_set_xdp) +BTF_KFUNCS_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) -BTF_SET8_END(bpf_kfunc_check_set_xdp) +BTF_KFUNCS_END(bpf_kfunc_check_set_xdp) -BTF_SET8_START(bpf_kfunc_check_set_sock_addr) +BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr) BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) -BTF_SET8_END(bpf_kfunc_check_set_sock_addr) +BTF_KFUNCS_END(bpf_kfunc_check_set_sock_addr) -BTF_SET8_START(bpf_kfunc_check_set_tcp_reqsk) +BTF_KFUNCS_START(bpf_kfunc_check_set_tcp_reqsk) BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_kfunc_check_set_tcp_reqsk) +BTF_KFUNCS_END(bpf_kfunc_check_set_tcp_reqsk) static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, @@ -12075,9 +12075,9 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_sk_iter_kfunc_ids) +BTF_KFUNCS_START(bpf_sk_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_sk_iter_kfunc_ids) +BTF_KFUNCS_END(bpf_sk_iter_kfunc_ids) static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/net/core/gro.c b/net/core/gro.c index 0759277dc14e..6a0edbd826a1 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -369,15 +369,21 @@ static void gro_list_prepare(const struct list_head *head, static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) { - const struct skb_shared_info *pinfo = skb_shinfo(skb); - const skb_frag_t *frag0 = &pinfo->frags[0]; + const struct skb_shared_info *pinfo; + const skb_frag_t *frag0; + unsigned int headlen; NAPI_GRO_CB(skb)->data_offset = 0; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; + headlen = skb_headlen(skb); + NAPI_GRO_CB(skb)->frag0 = skb->data; + NAPI_GRO_CB(skb)->frag0_len = headlen; + if (headlen) + return; + + pinfo = skb_shinfo(skb); + frag0 = &pinfo->frags[0]; - if (!skb_headlen(skb) && pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0)) && + if (pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0)) && (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, @@ -700,7 +706,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) skb_reset_mac_header(skb); skb_gro_reset_offset(skb, hlen); - if (unlikely(skb_gro_header_hard(skb, hlen))) { + if (unlikely(!skb_gro_may_pull(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0); if (unlikely(!eth)) { net_warn_ratelimited("%s: dropping impossible skb from %s\n", @@ -710,7 +716,10 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) } } else { eth = (const struct ethhdr *)skb->data; - gro_pull_from_frag0(skb, hlen); + + if (NAPI_GRO_CB(skb)->frag0 != skb->data) + gro_pull_from_frag0(skb, hlen); + NAPI_GRO_CB(skb)->frag0 += hlen; NAPI_GRO_CB(skb)->frag0_len -= hlen; } diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index fd98936da3ae..918b109e0cf4 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -152,10 +152,7 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } rtnl_unlock(); - if (err != -EMSGSIZE) - return err; - - return skb->len; + return err; } static int @@ -287,10 +284,7 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } rtnl_unlock(); - if (err != -EMSGSIZE) - return err; - - return skb->len; + return err; } static int @@ -463,10 +457,7 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } rtnl_unlock(); - if (err != -EMSGSIZE) - return err; - - return skb->len; + return err; } static int netdev_genl_netdevice_event(struct notifier_block *nb, diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index ffe5244e5597..3a3277ba167b 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -94,16 +94,15 @@ netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb, state->pp_id = pool->user.id; err = fill(skb, pool, info); if (err) - break; + goto out; } state->pp_id = 0; } +out: mutex_unlock(&page_pools_lock); rtnl_unlock(); - if (skb->len && err == -EMSGSIZE) - return skb->len; return err; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 39f17d0b6cea..a3d7847ce69d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1056,7 +1056,7 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev) { size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */ - size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev)); + size += dpll_netdev_pin_handle_size(dev); return size; } @@ -1793,7 +1793,7 @@ static int rtnl_fill_dpll_pin(struct sk_buff *skb, if (!dpll_pin_nest) return -EMSGSIZE; - ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev)); + ret = dpll_netdev_add_pin_handle(skb, dev); if (ret < 0) goto nest_cancel; @@ -2267,11 +2267,8 @@ walk_entries: nlh->nlmsg_seq, 0, flags, ext_filter_mask, 0, NULL, 0, netnsid, GFP_KERNEL); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; + if (err < 0) break; - } } cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -5148,10 +5145,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - struct nlattr *br_spec, *attr = NULL; + struct nlattr *br_spec, *attr, *br_flags_attr = NULL; int rem, err = -EOPNOTSUPP; u16 flags = 0; - bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -5169,11 +5165,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; - have_flags = true; + br_flags_attr = attr; flags = nla_get_u16(attr); } @@ -5217,8 +5213,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, } } - if (have_flags) - memcpy(nla_data(attr), &flags, sizeof(flags)); + if (br_flags_attr) + memcpy(nla_data(br_flags_attr), &flags, sizeof(flags)); out: return err; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1f918e602bc4..43d7fc150acc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -315,7 +315,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) fragsz = SKB_DATA_ALIGN(fragsz); - return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, + align_mask); } EXPORT_SYMBOL(__napi_alloc_frag_align); @@ -327,13 +328,15 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) if (in_hardirq() || irqs_disabled()) { struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); - data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); + data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, + align_mask); } else { struct napi_alloc_cache *nc; local_bh_disable(); nc = this_cpu_ptr(&napi_alloc_cache); - data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, + align_mask); local_bh_enable(); } return data; diff --git a/net/core/sock.c b/net/core/sock.c index df2ac54a8f74..43bf3818c19e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2053,8 +2053,9 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); - memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, - prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); + unsafe_memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, + prot->obj_size - offsetof(struct sock, sk_dontcopy_end), + /* alloc is larger than struct, see sk_prot_alloc() */); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; diff --git a/net/core/xdp.c b/net/core/xdp.c index 27b585f3fa81..0e3709a29175 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -771,11 +771,11 @@ __bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, __bpf_kfunc_end_defs(); -BTF_SET8_START(xdp_metadata_kfunc_ids) +BTF_KFUNCS_START(xdp_metadata_kfunc_ids) #define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC -BTF_SET8_END(xdp_metadata_kfunc_ids) +BTF_KFUNCS_END(xdp_metadata_kfunc_ids) static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/dsa/user.c b/net/dsa/user.c index 9c42a6edcdc8..16d395bb1a1f 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -2625,11 +2625,7 @@ int dsa_user_create(struct dsa_port *port) user_dev->vlan_features = conduit->vlan_features; p = netdev_priv(user_dev); - user_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!user_dev->tstats) { - free_netdev(user_dev); - return -ENOMEM; - } + user_dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; ret = gro_cells_init(&p->gcells, user_dev); if (ret) @@ -2695,7 +2691,6 @@ out_phy: out_gcells: gro_cells_destroy(&p->gcells); out_free: - free_percpu(user_dev->tstats); free_netdev(user_dev); port->user = NULL; return ret; @@ -2716,7 +2711,6 @@ void dsa_user_destroy(struct net_device *user_dev) dsa_port_phylink_destroy(dp); gro_cells_destroy(&p->gcells); - free_percpu(user_dev->tstats); free_netdev(user_dev); } diff --git a/net/ethtool/common.c b/net/ethtool/common.c index ce486cec346c..6b2a360dcdf0 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -712,8 +712,3 @@ ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size) } } EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init); - -bool ethtool_eee_use_linkmodes(const struct ethtool_keee *eee) -{ - return !linkmode_empty(eee->supported); -} diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 0f2b5f7eacee..28b8aaaf9bcb 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -55,6 +55,5 @@ int ethtool_get_module_eeprom_call(struct net_device *dev, struct ethtool_eeprom *ee, u8 *data); bool __ethtool_dev_mm_supported(struct net_device *dev); -bool ethtool_eee_use_linkmodes(const struct ethtool_keee *eee); #endif /* _ETHTOOL_COMMON_H */ diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 317308bdbda9..5a55270aa86e 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1514,17 +1514,12 @@ static void eee_to_keee(struct ethtool_keee *keee, { memset(keee, 0, sizeof(*keee)); - keee->eee_active = eee->eee_active; keee->eee_enabled = eee->eee_enabled; keee->tx_lpi_enabled = eee->tx_lpi_enabled; keee->tx_lpi_timer = eee->tx_lpi_timer; - ethtool_convert_legacy_u32_to_link_mode(keee->supported, - eee->supported); ethtool_convert_legacy_u32_to_link_mode(keee->advertised, eee->advertised); - ethtool_convert_legacy_u32_to_link_mode(keee->lp_advertised, - eee->lp_advertised); } static void keee_to_eee(struct ethtool_eee *eee, diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 80cdc6f6b34c..5d68cb181695 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) return false; /* Get next tlv */ - total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length; + total_length += hsr_sup_tag->tlv.HSR_TLV_length; if (!pskb_may_pull(skb, total_length)) return false; skb_pull(skb, total_length); @@ -435,7 +435,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame) continue; /* Don't send frame over port where it has been sent before. - * Also fro SAN, this shouldn't be done. + * Also for SAN, this shouldn't be done. */ if (!frame->is_from_san && hsr_register_frame_out(port, frame->node_src, diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 834edc18463a..7f518ea5f4ac 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -201,13 +201,13 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, } } -BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids) +BTF_KFUNCS_START(bpf_tcp_ca_check_kfunc_ids) BTF_ID_FLAGS(func, tcp_reno_ssthresh) BTF_ID_FLAGS(func, tcp_reno_cong_avoid) BTF_ID_FLAGS(func, tcp_reno_undo_cwnd) BTF_ID_FLAGS(func, tcp_slow_start) BTF_ID_FLAGS(func, tcp_cong_avoid_ai) -BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids) +BTF_KFUNCS_END(bpf_tcp_ca_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index af741af61830..7a437f0d4190 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -713,34 +713,37 @@ static void check_lifetime(struct work_struct *work) rcu_read_lock(); hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { - unsigned long age; + unsigned long age, tstamp; + u32 preferred_lft; + u32 valid_lft; + u32 flags; - if (ifa->ifa_flags & IFA_F_PERMANENT) + flags = READ_ONCE(ifa->ifa_flags); + if (flags & IFA_F_PERMANENT) continue; + preferred_lft = READ_ONCE(ifa->ifa_preferred_lft); + valid_lft = READ_ONCE(ifa->ifa_valid_lft); + tstamp = READ_ONCE(ifa->ifa_tstamp); /* We try to batch several events at once. */ - age = (now - ifa->ifa_tstamp + + age = (now - tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; - if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && - age >= ifa->ifa_valid_lft) { + if (valid_lft != INFINITY_LIFE_TIME && + age >= valid_lft) { change_needed = true; - } else if (ifa->ifa_preferred_lft == + } else if (preferred_lft == INFINITY_LIFE_TIME) { continue; - } else if (age >= ifa->ifa_preferred_lft) { - if (time_before(ifa->ifa_tstamp + - ifa->ifa_valid_lft * HZ, next)) - next = ifa->ifa_tstamp + - ifa->ifa_valid_lft * HZ; + } else if (age >= preferred_lft) { + if (time_before(tstamp + valid_lft * HZ, next)) + next = tstamp + valid_lft * HZ; - if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) + if (!(flags & IFA_F_DEPRECATED)) change_needed = true; - } else if (time_before(ifa->ifa_tstamp + - ifa->ifa_preferred_lft * HZ, + } else if (time_before(tstamp + preferred_lft * HZ, next)) { - next = ifa->ifa_tstamp + - ifa->ifa_preferred_lft * HZ; + next = tstamp + preferred_lft * HZ; } } rcu_read_unlock(); @@ -804,24 +807,26 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, __u32 prefered_lft) { unsigned long timeout; + u32 flags; - ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); + flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); timeout = addrconf_timeout_fixup(valid_lft, HZ); if (addrconf_finite_timeout(timeout)) - ifa->ifa_valid_lft = timeout; + WRITE_ONCE(ifa->ifa_valid_lft, timeout); else - ifa->ifa_flags |= IFA_F_PERMANENT; + flags |= IFA_F_PERMANENT; timeout = addrconf_timeout_fixup(prefered_lft, HZ); if (addrconf_finite_timeout(timeout)) { if (timeout == 0) - ifa->ifa_flags |= IFA_F_DEPRECATED; - ifa->ifa_preferred_lft = timeout; + flags |= IFA_F_DEPRECATED; + WRITE_ONCE(ifa->ifa_preferred_lft, timeout); } - ifa->ifa_tstamp = jiffies; + WRITE_ONCE(ifa->ifa_flags, flags); + WRITE_ONCE(ifa->ifa_tstamp, jiffies); if (!ifa->ifa_cstamp) - ifa->ifa_cstamp = ifa->ifa_tstamp; + WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp); } static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, @@ -1312,7 +1317,7 @@ static __be32 in_dev_select_addr(const struct in_device *in_dev, const struct in_ifaddr *ifa; in_dev_for_each_ifa_rcu(ifa, in_dev) { - if (ifa->ifa_flags & IFA_F_SECONDARY) + if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY) continue; if (ifa->ifa_scope != RT_SCOPE_LINK && ifa->ifa_scope <= scope) @@ -1340,7 +1345,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) localnet_scope = RT_SCOPE_LINK; in_dev_for_each_ifa_rcu(ifa, in_dev) { - if (ifa->ifa_flags & IFA_F_SECONDARY) + if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY) continue; if (min(ifa->ifa_scope, localnet_scope) > scope) continue; @@ -1671,11 +1676,12 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); } -static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, +static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, struct inet_fill_args *args) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; + unsigned long tstamp; u32 preferred, valid; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm), @@ -1686,7 +1692,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; - ifm->ifa_flags = ifa->ifa_flags; + ifm->ifa_flags = READ_ONCE(ifa->ifa_flags); ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; @@ -1694,11 +1700,12 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto nla_put_failure; + tstamp = READ_ONCE(ifa->ifa_tstamp); if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { - preferred = ifa->ifa_preferred_lft; - valid = ifa->ifa_valid_lft; + preferred = READ_ONCE(ifa->ifa_preferred_lft); + valid = READ_ONCE(ifa->ifa_valid_lft); if (preferred != INFINITY_LIFE_TIME) { - long tval = (jiffies - ifa->ifa_tstamp) / HZ; + long tval = (jiffies - tstamp) / HZ; if (preferred > tval) preferred -= tval; @@ -1725,10 +1732,10 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || (ifa->ifa_proto && nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) || - nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || + nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) || (ifa->ifa_rt_priority && nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) || - put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, + put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp, preferred, valid)) goto nla_put_failure; @@ -1798,15 +1805,15 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, } static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, - struct netlink_callback *cb, int s_ip_idx, + struct netlink_callback *cb, int *s_ip_idx, struct inet_fill_args *fillargs) { struct in_ifaddr *ifa; int ip_idx = 0; int err; - in_dev_for_each_ifa_rtnl(ifa, in_dev) { - if (ip_idx < s_ip_idx) { + in_dev_for_each_ifa_rcu(ifa, in_dev) { + if (ip_idx < *s_ip_idx) { ip_idx++; continue; } @@ -1818,9 +1825,9 @@ static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, ip_idx++; } err = 0; - + ip_idx = 0; done: - cb->args[2] = ip_idx; + *s_ip_idx = ip_idx; return err; } @@ -1830,7 +1837,7 @@ done: static u32 inet_base_seq(const struct net *net) { u32 res = atomic_read(&net->ipv4.dev_addr_genid) + - net->dev_base_seq; + READ_ONCE(net->dev_base_seq); /* Must not return 0 (see nl_dump_check_consistent()). * Chose a value far away from 0. @@ -1852,75 +1859,51 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) }; struct net *net = sock_net(skb->sk); struct net *tgt_net = net; - int h, s_h; - int idx, s_idx; - int s_ip_idx; - struct net_device *dev; + struct { + unsigned long ifindex; + int ip_idx; + } *ctx = (void *)cb->ctx; struct in_device *in_dev; - struct hlist_head *head; + struct net_device *dev; int err = 0; - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - s_ip_idx = cb->args[2]; - + rcu_read_lock(); if (cb->strict_check) { err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); if (err < 0) - goto put_tgt_net; + goto done; - err = 0; if (fillargs.ifindex) { - dev = __dev_get_by_index(tgt_net, fillargs.ifindex); - if (!dev) { - err = -ENODEV; - goto put_tgt_net; - } - - in_dev = __in_dev_get_rtnl(dev); - if (in_dev) { - err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx, - &fillargs); - } - goto put_tgt_net; - } - } - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &tgt_net->dev_index_head[h]; - rcu_read_lock(); - cb->seq = inet_base_seq(tgt_net); - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - if (h > s_h || idx > s_idx) - s_ip_idx = 0; + err = -ENODEV; + dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex); + if (!dev) + goto done; in_dev = __in_dev_get_rcu(dev); if (!in_dev) - goto cont; - - err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx, - &fillargs); - if (err < 0) { - rcu_read_unlock(); goto done; - } -cont: - idx++; + err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx, + &fillargs); + goto done; } - rcu_read_unlock(); } + cb->seq = inet_base_seq(tgt_net); + + for_each_netdev_dump(net, dev, ctx->ifindex) { + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + continue; + err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx, + &fillargs); + if (err < 0) + goto done; + } done: - cb->args[0] = h; - cb->args[1] = idx; -put_tgt_net: if (fillargs.netnsid >= 0) put_net(tgt_net); - - return skb->len ? : err; + rcu_read_unlock(); + return err; } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, @@ -2327,8 +2310,6 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, ctx->all_default++; } done: - if (err < 0 && likely(skb->len)) - err = skb->len; rcu_read_unlock(); return err; } @@ -2811,7 +2792,8 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0); - rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0); + rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, + RTNL_FLAG_DUMP_UNLOCKED); rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, inet_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index bf3a2214fe29..48741352a88a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1026,8 +1026,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) goto unlock; } err = fib_table_dump(tb, skb, cb, &filter); - if (err < 0 && skb->len) - err = skb->len; goto unlock; } @@ -1045,11 +1043,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); err = fib_table_dump(tb, skb, cb, &filter); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; + if (err < 0) goto out; - } dumped = 1; next: e++; diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c index 4da03bf45c9b..06e5572f296f 100644 --- a/net/ipv4/fou_bpf.c +++ b/net/ipv4/fou_bpf.c @@ -100,10 +100,10 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, __bpf_kfunc_end_defs(); -BTF_SET8_START(fou_kfunc_set) +BTF_KFUNCS_START(fou_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_set_fou_encap) BTF_ID_FLAGS(func, bpf_skb_get_fou_encap) -BTF_SET8_END(fou_kfunc_set) +BTF_KFUNCS_END(fou_kfunc_set) static const struct btf_kfunc_id_set fou_bpf_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index 0c41076e31ed..a8494f796dca 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -351,7 +351,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, optlen = guehdr->hlen << 2; len += optlen; - if (skb_gro_header_hard(skb, len)) { + if (!skb_gro_may_pull(skb, len)) { guehdr = skb_gro_header_slow(skb, len, off); if (unlikely(!guehdr)) goto out; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 311e70bfce40..5028c72d494a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -174,7 +174,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head, grehlen += GRE_HEADER_SECTION; hlen = off + grehlen; - if (skb_gro_header_hard(skb, hlen)) { + if (!skb_gro_may_pull(skb, hlen)) { greh = skb_gro_header_slow(skb, hlen, off); if (unlikely(!greh)) goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 747ed7344cbe..7d8090f109ef 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -906,8 +906,9 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req, memcpy(nreq_sk, req_sk, offsetof(struct sock, sk_dontcopy_begin)); - memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end, - req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end)); + unsafe_memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end, + req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end), + /* alloc is larger than struct, see above */); sk_node_init(&nreq_sk->sk_node); nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1fe794967211..33f93dc730a3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1458,6 +1458,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; skb->tstamp = cork->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 21d2ffa919e9..cf377377b52d 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -894,7 +894,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); - int val = 0, err; + int val = 0, err, retv; bool needs_rtnl = setsockopt_needs_rtnl(optname); switch (optname) { @@ -938,8 +938,12 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, /* If optlen==0, it is equivalent to val == 0 */ - if (optname == IP_ROUTER_ALERT) - return ip_ra_control(sk, val ? 1 : 0, NULL); + if (optname == IP_ROUTER_ALERT) { + retv = ip_ra_control(sk, val ? 1 : 0, NULL); + if (retv == 0) + inet_assign_bit(RTALERT, sk, val); + return retv; + } if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk, optname, optval, optlen); @@ -1575,6 +1579,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_BIND_ADDRESS_NO_PORT: val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); goto copyval; + case IP_ROUTER_ALERT: + val = inet_test_bit(RTALERT, sk); + goto copyval; case IP_TTL: val = READ_ONCE(inet->uc_ttl); if (val < 0) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 756f8b923883..d3ee6565fcfb 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -553,6 +553,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } +static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) +{ + /* we must cap headroom to some upperlimit, else pskb_expand_head + * will overflow header offsets in skb_headers_offset_update(). + */ + static const unsigned int max_allowed = 512; + + if (headroom > max_allowed) + headroom = max_allowed; + + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); +} + void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto, int tunnel_hlen) { @@ -631,13 +645,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; - if (headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, headroom); - - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, headroom)) { ip_rt_put(rt); goto tx_dropped; } + + ip_tunnel_adj_headroom(dev, headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; @@ -817,16 +831,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); - if (max_headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, max_headroom); - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, max_headroom)) { ip_rt_put(rt); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return; } + ip_tunnel_adj_headroom(dev, max_headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7d2bdfd7e7d7..494a6284bd7e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -310,7 +310,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) } nf_reset_ct(skb); - skb_push(skb, skb->data - skb_network_header(skb)); + skb_push(skb, -skb_network_offset(skb)); raw_rcv_skb(sk, skb); return 0; @@ -353,6 +353,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; skb_dst_set(skb, &rt->dst); *rtp = NULL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82dc42f57c6..7e1b848398d0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4651,7 +4651,7 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags); - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 105); /* TXRX read-write hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 22358032dd48..05dc2d05bc7c 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1155,7 +1155,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET8_START(tcp_bbr_check_kfunc_ids) +BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, bbr_init) @@ -1168,7 +1168,7 @@ BTF_ID_FLAGS(func, bbr_min_tso_segs) BTF_ID_FLAGS(func, bbr_set_state) #endif #endif -BTF_SET8_END(tcp_bbr_check_kfunc_ids) +BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 0fd78ecb67e7..44869ea089e3 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET8_START(tcp_cubic_check_kfunc_ids) +BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, cubictcp_init) @@ -496,7 +496,7 @@ BTF_ID_FLAGS(func, cubictcp_cwnd_event) BTF_ID_FLAGS(func, cubictcp_acked) #endif #endif -BTF_SET8_END(tcp_cubic_check_kfunc_ids) +BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index bb23bb5b387a..e33fbe4933e4 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -260,7 +260,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET8_START(tcp_dctcp_check_kfunc_ids) +BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, dctcp_init) @@ -271,7 +271,7 @@ BTF_ID_FLAGS(func, dctcp_cwnd_undo) BTF_ID_FLAGS(func, dctcp_state) #endif #endif -BTF_SET8_END(tcp_dctcp_check_kfunc_ids) +BTF_KFUNCS_END(tcp_dctcp_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 8311c38267b5..b955ab3b236d 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -204,7 +204,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) goto out; hlen = off + thlen; - if (skb_gro_header_hard(skb, hlen)) { + if (!skb_gro_may_pull(skb, hlen)) { th = skb_gro_header_slow(skb, hlen, off); if (unlikely(!th)) goto out; @@ -299,18 +299,20 @@ out: void tcp_gro_complete(struct sk_buff *skb) { struct tcphdr *th = tcp_hdr(skb); + struct skb_shared_info *shinfo; + + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; skb->csum_start = (unsigned char *)th - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; - skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + shinfo = skb_shinfo(skb); + shinfo->gso_segs = NAPI_GRO_CB(skb)->count; if (th->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - - if (skb->encapsulation) - skb->inner_transport_header = skb->transport_header; + shinfo->gso_type |= SKB_GSO_TCP_ECN; } EXPORT_SYMBOL(tcp_gro_complete); @@ -335,10 +337,9 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, iph->daddr, 0); - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; - if (NAPI_GRO_CB(skb)->is_atomic) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 | + (NAPI_GRO_CB(skb)->is_atomic * SKB_GSO_TCP_FIXEDID); tcp_gro_complete(skb); return 0; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index c54676998eb6..dae35101d189 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -58,7 +58,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) return -iph->protocol; #endif - __skb_push(skb, skb->data - skb_network_header(skb)); + __skb_push(skb, -skb_network_offset(skb)); iph->tot_len = htons(skb->len); ip_send_check(iph); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 953a95898e4a..f786b65d12e4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -551,7 +551,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, goto out; if ((all || type == NETCONFA_FORWARDING) && - nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) + nla_put_s32(skb, NETCONFA_FORWARDING, + READ_ONCE(devconf->forwarding)) < 0) goto nla_put_failure; #ifdef CONFIG_IPV6_MROUTE if ((all || type == NETCONFA_MC_FORWARDING) && @@ -560,12 +561,13 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, goto nla_put_failure; #endif if ((all || type == NETCONFA_PROXY_NEIGH) && - nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, + READ_ONCE(devconf->proxy_ndp)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, - devconf->ignore_routes_with_linkdown) < 0) + READ_ONCE(devconf->ignore_routes_with_linkdown)) < 0) goto nla_put_failure; out: @@ -725,17 +727,18 @@ static u32 inet6_base_seq(const struct net *net) return res; } - static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - int h, s_h; - int idx, s_idx; + struct { + unsigned long ifindex; + unsigned int all_default; + } *ctx = (void *)cb->ctx; struct net_device *dev; struct inet6_dev *idev; - struct hlist_head *head; + int err = 0; if (cb->strict_check) { struct netlink_ext_ack *extack = cb->extack; @@ -752,64 +755,46 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, } } - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - rcu_read_lock(); - cb->seq = inet6_base_seq(net); - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - idev = __in6_dev_get(dev); - if (!idev) - goto cont; - - if (inet6_netconf_fill_devconf(skb, dev->ifindex, - &idev->cnf, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, - NLM_F_MULTI, - NETCONFA_ALL) < 0) { - rcu_read_unlock(); - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - rcu_read_unlock(); + rcu_read_lock(); + for_each_netdev_dump(net, dev, ctx->ifindex) { + idev = __in6_dev_get(dev); + if (!idev) + continue; + err = inet6_netconf_fill_devconf(skb, dev->ifindex, + &idev->cnf, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) + goto done; } - if (h == NETDEV_HASHENTRIES) { - if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, - net->ipv6.devconf_all, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + if (ctx->all_default == 0) { + err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; - } - if (h == NETDEV_HASHENTRIES + 1) { - if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, - net->ipv6.devconf_dflt, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + ctx->all_default++; + } + if (ctx->all_default == 1) { + err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; + ctx->all_default++; } done: - cb->args[0] = h; - cb->args[1] = idx; - - return skb->len; + rcu_read_unlock(); + return err; } #ifdef CONFIG_SYSCTL @@ -869,7 +854,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); - idev->cnf.forwarding = newf; + + WRITE_ONCE(idev->cnf.forwarding, newf); if (changed) dev_forward_change(idev); } @@ -886,7 +872,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->forwarding) { if ((!newf) ^ (!old)) @@ -901,7 +887,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_all->forwarding) { int old_dflt = net->ipv6.devconf_dflt->forwarding; - net->ipv6.devconf_dflt->forwarding = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->forwarding, newf); if ((!newf) ^ (!old_dflt)) inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, @@ -933,7 +919,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf) if (idev) { int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf); - idev->cnf.ignore_routes_with_linkdown = newf; + WRITE_ONCE(idev->cnf.ignore_routes_with_linkdown, newf); if (changed) inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, @@ -954,7 +940,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) { if ((!newf) ^ (!old)) @@ -968,7 +954,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) } if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) { - net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->ignore_routes_with_linkdown, newf); addrconf_linkdown_change(net, newf); if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, @@ -1356,11 +1342,12 @@ out: in6_ifa_put(ifp); } -static unsigned long ipv6_get_regen_advance(struct inet6_dev *idev) +static unsigned long ipv6_get_regen_advance(const struct inet6_dev *idev) { - return idev->cnf.regen_min_advance + idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; + return READ_ONCE(idev->cnf.regen_min_advance) + + READ_ONCE(idev->cnf.regen_max_retry) * + READ_ONCE(idev->cnf.dad_transmits) * + max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; } static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) @@ -1381,7 +1368,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) retry: in6_dev_hold(idev); - if (idev->cnf.use_tempaddr <= 0) { + if (READ_ONCE(idev->cnf.use_tempaddr) <= 0) { write_unlock_bh(&idev->lock); pr_info("%s: use_tempaddr is disabled\n", __func__); in6_dev_put(idev); @@ -1389,8 +1376,8 @@ retry: goto out; } spin_lock_bh(&ifp->lock); - if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { - idev->cnf.use_tempaddr = -1; /*XXX*/ + if (ifp->regen_count++ >= READ_ONCE(idev->cnf.regen_max_retry)) { + WRITE_ONCE(idev->cnf.use_tempaddr, -1); /*XXX*/ spin_unlock_bh(&ifp->lock); write_unlock_bh(&idev->lock); pr_warn("%s: regeneration time exceeded - disabled temporary address support\n", @@ -1412,7 +1399,7 @@ retry: */ cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft); max_desync_factor = min_t(long, - idev->cnf.max_desync_factor, + READ_ONCE(idev->cnf.max_desync_factor), cnf_temp_preferred_lft - regen_advance); if (unlikely(idev->desync_factor > max_desync_factor)) { @@ -1429,7 +1416,7 @@ retry: memset(&cfg, 0, sizeof(cfg)); cfg.valid_lft = min_t(__u32, ifp->valid_lft, - idev->cnf.temp_valid_lft + age); + READ_ONCE(idev->cnf.temp_valid_lft) + age); cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor; cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft); cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft); @@ -1553,15 +1540,17 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -static bool ipv6_use_optimistic_addr(struct net *net, - struct inet6_dev *idev) +static bool ipv6_use_optimistic_addr(const struct net *net, + const struct inet6_dev *idev) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (!idev) return false; - if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) && + !READ_ONCE(idev->cnf.optimistic_dad)) return false; - if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic) + if (!READ_ONCE(net->ipv6.devconf_all->use_optimistic) && + !READ_ONCE(idev->cnf.use_optimistic)) return false; return true; @@ -1570,13 +1559,14 @@ static bool ipv6_use_optimistic_addr(struct net *net, #endif } -static bool ipv6_allow_optimistic_dad(struct net *net, - struct inet6_dev *idev) +static bool ipv6_allow_optimistic_dad(const struct net *net, + const struct inet6_dev *idev) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (!idev) return false; - if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) && + !READ_ONCE(idev->cnf.optimistic_dad)) return false; return true; @@ -1682,7 +1672,7 @@ static int ipv6_get_saddr_eval(struct net *net, */ int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? !!(dst->prefs & IPV6_PREFER_SRC_TMP) : - score->ifa->idev->cnf.use_tempaddr >= 2; + READ_ONCE(score->ifa->idev->cnf.use_tempaddr) >= 2; ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; break; } @@ -1858,7 +1848,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, idev = __in6_dev_get(dst_dev); if ((dst_type & IPV6_ADDR_MULTICAST) || dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL || - (idev && idev->cnf.use_oif_addrs_only)) { + (idev && READ_ONCE(idev->cnf.use_oif_addrs_only))) { use_oif_addr = true; } } @@ -2165,6 +2155,7 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net *net = dev_net(idev->dev); + int max_addresses; if (addrconf_dad_end(ifp)) { in6_ifa_put(ifp); @@ -2202,9 +2193,9 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) spin_unlock_bh(&ifp->lock); - if (idev->cnf.max_addresses && - ipv6_count_addresses(idev) >= - idev->cnf.max_addresses) + max_addresses = READ_ONCE(idev->cnf.max_addresses); + if (max_addresses && + ipv6_count_addresses(idev) >= max_addresses) goto lock_errdad; net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n", @@ -2601,11 +2592,11 @@ static void manage_tempaddrs(struct inet6_dev *idev, * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively. */ age = (now - ift->cstamp) / HZ; - max_valid = idev->cnf.temp_valid_lft - age; + max_valid = READ_ONCE(idev->cnf.temp_valid_lft) - age; if (max_valid < 0) max_valid = 0; - max_prefered = idev->cnf.temp_prefered_lft - + max_prefered = READ_ONCE(idev->cnf.temp_prefered_lft) - idev->desync_factor - age; if (max_prefered < 0) max_prefered = 0; @@ -2638,7 +2629,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft)) create = true; - if (create && idev->cnf.use_tempaddr > 0) { + if (create && READ_ONCE(idev->cnf.use_tempaddr) > 0) { /* When a new public address is created as described * in [ADDRCONF], also create a new temporary address. */ @@ -2666,7 +2657,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, int create = 0, update_lft = 0; if (!ifp && valid_lft) { - int max_addresses = in6_dev->cnf.max_addresses; + int max_addresses = READ_ONCE(in6_dev->cnf.max_addresses); struct ifa6_config cfg = { .pfx = addr, .plen = pinfo->prefix_len, @@ -2678,8 +2669,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, }; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if ((net->ipv6.devconf_all->optimistic_dad || - in6_dev->cnf.optimistic_dad) && + if ((READ_ONCE(net->ipv6.devconf_all->optimistic_dad) || + READ_ONCE(in6_dev->cnf.optimistic_dad)) && !net->ipv6.devconf_all->forwarding && sllao) cfg.ifa_flags |= IFA_F_OPTIMISTIC; #endif @@ -2728,7 +2719,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, */ update_lft = !create && stored_lft; - if (update_lft && !in6_dev->cnf.ra_honor_pio_life) { + if (update_lft && !READ_ONCE(in6_dev->cnf.ra_honor_pio_life)) { const u32 minimum_lft = min_t(u32, stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); @@ -3312,8 +3303,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev, struct inet6_ifaddr *ifp; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad || - idev->cnf.optimistic_dad) && + if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad) || + READ_ONCE(idev->cnf.optimistic_dad)) && !dev_net(idev->dev)->ipv6.devconf_all->forwarding) cfg.ifa_flags |= IFA_F_OPTIMISTIC; #endif @@ -3671,7 +3662,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) { rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; + WRITE_ONCE(idev->cnf.mtu6, dev->mtu); break; } @@ -3763,7 +3754,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) { rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; + WRITE_ONCE(idev->cnf.mtu6, dev->mtu); } WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); @@ -3885,10 +3876,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) */ if (!unregister && !idev->cnf.disable_ipv6) { /* aggregate the system setting and interface setting */ - int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; + int _keep_addr = READ_ONCE(net->ipv6.devconf_all->keep_addr_on_down); if (!_keep_addr) - _keep_addr = idev->cnf.keep_addr_on_down; + _keep_addr = READ_ONCE(idev->cnf.keep_addr_on_down); keep_addr = (_keep_addr > 0); } @@ -4025,6 +4016,7 @@ static void addrconf_rs_timer(struct timer_list *t) struct inet6_dev *idev = from_timer(idev, t, rs_timer); struct net_device *dev = idev->dev; struct in6_addr lladdr; + int rtr_solicits; write_lock(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) @@ -4037,7 +4029,9 @@ static void addrconf_rs_timer(struct timer_list *t) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { + rtr_solicits = READ_ONCE(idev->cnf.rtr_solicits); + + if (idev->rs_probes++ < rtr_solicits || rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -4047,11 +4041,12 @@ static void addrconf_rs_timer(struct timer_list *t) write_lock(&idev->lock); idev->rs_interval = rfc3315_s14_backoff_update( - idev->rs_interval, idev->cnf.rtr_solicit_max_interval); + idev->rs_interval, + READ_ONCE(idev->cnf.rtr_solicit_max_interval)); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == - idev->cnf.rtr_solicits) ? - idev->cnf.rtr_solicit_delay : + READ_ONCE(idev->cnf.rtr_solicits)) ? + READ_ONCE(idev->cnf.rtr_solicit_delay) : idev->rs_interval); } else { /* @@ -4072,24 +4067,25 @@ put: */ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) { - unsigned long rand_num; struct inet6_dev *idev = ifp->idev; + unsigned long rand_num; u64 nonce; if (ifp->flags & IFA_F_OPTIMISTIC) rand_num = 0; else - rand_num = get_random_u32_below(idev->cnf.rtr_solicit_delay ? : 1); + rand_num = get_random_u32_below( + READ_ONCE(idev->cnf.rtr_solicit_delay) ? : 1); nonce = 0; - if (idev->cnf.enhanced_dad || - dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) { + if (READ_ONCE(idev->cnf.enhanced_dad) || + READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad)) { do get_random_bytes(&nonce, 6); while (nonce == 0); } ifp->dad_nonce = nonce; - ifp->dad_probes = idev->cnf.dad_transmits; + ifp->dad_probes = READ_ONCE(idev->cnf.dad_transmits); addrconf_mod_dad_work(ifp, rand_num); } @@ -4109,8 +4105,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) net = dev_net(dev); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - (net->ipv6.devconf_all->accept_dad < 1 && - idev->cnf.accept_dad < 1) || + (READ_ONCE(net->ipv6.devconf_all->accept_dad) < 1 && + READ_ONCE(idev->cnf.accept_dad) < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { bool send_na = false; @@ -4202,8 +4198,8 @@ static void addrconf_dad_work(struct work_struct *w) action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; - if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 || - idev->cnf.accept_dad > 1) && + if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 || + READ_ONCE(idev->cnf.accept_dad) > 1) && !idev->cnf.disable_ipv6 && !(ifp->flags & IFA_F_STABLE_PRIVACY)) { struct in6_addr addr; @@ -4214,7 +4210,7 @@ static void addrconf_dad_work(struct work_struct *w) if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && ipv6_addr_equal(&ifp->addr, &addr)) { /* DAD failed for link-local based on MAC */ - idev->cnf.disable_ipv6 = 1; + WRITE_ONCE(idev->cnf.disable_ipv6, 1); pr_info("%s: IPv6 being disabled!\n", ifp->idev->dev->name); @@ -4328,7 +4324,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits != 0 && + READ_ONCE(ifp->idev->cnf.rtr_solicits) != 0 && (dev->flags & IFF_LOOPBACK) == 0 && (dev->type != ARPHRD_TUNNEL) && !netif_is_team_port(dev); @@ -4342,8 +4338,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, /* send unsolicited NA if enabled */ if (send_na && - (ifp->idev->cnf.ndisc_notify || - dev_net(dev)->ipv6.devconf_all->ndisc_notify)) { + (READ_ONCE(ifp->idev->cnf.ndisc_notify) || + READ_ONCE(dev_net(dev)->ipv6.devconf_all->ndisc_notify))) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr, /*router=*/ !!ifp->idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, @@ -4363,7 +4359,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); ifp->idev->rs_interval = rfc3315_s14_backoff_init( - ifp->idev->cnf.rtr_solicit_interval); + READ_ONCE(ifp->idev->cnf.rtr_solicit_interval)); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); @@ -5567,9 +5563,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, } addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (!addr) - return -EINVAL; - + if (!addr) { + err = -EINVAL; + goto errout; + } ifm = nlmsg_data(nlh); if (ifm->ifa_index) dev = dev_get_by_index(tgt_net, ifm->ifa_index); @@ -5911,7 +5908,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token, return -EINVAL; } - if (idev->cnf.rtr_solicits == 0) { + if (READ_ONCE(idev->cnf.rtr_solicits) == 0) { NL_SET_ERR_MSG(extack, "Router solicitation is disabled on device"); return -EINVAL; @@ -5944,7 +5941,7 @@ update_lft: if (update_rs) { idev->if_flags |= IF_RS_SENT; idev->rs_interval = rfc3315_s14_backoff_init( - idev->cnf.rtr_solicit_interval); + READ_ONCE(idev->cnf.rtr_solicit_interval)); idev->rs_probes = 1; addrconf_mod_rs_timer(idev, idev->rs_interval); } @@ -6159,11 +6156,8 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; + if (err < 0) break; - } } rcu_read_unlock(); @@ -6388,7 +6382,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); - idev->cnf.disable_ipv6 = newf; + + WRITE_ONCE(idev->cnf.disable_ipv6, newf); if (changed) dev_disable_change(idev); } @@ -6397,23 +6392,22 @@ static void addrconf_disable_change(struct net *net, __s32 newf) static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) { - struct net *net; + struct net *net = (struct net *)table->extra2; int old; + if (p == &net->ipv6.devconf_dflt->disable_ipv6) { + WRITE_ONCE(*p, newf); + return 0; + } + if (!rtnl_trylock()) return restart_syscall(); - net = (struct net *)table->extra2; old = *p; - *p = newf; - - if (p == &net->ipv6.devconf_dflt->disable_ipv6) { - rtnl_unlock(); - return 0; - } + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_all->disable_ipv6) { - net->ipv6.devconf_dflt->disable_ipv6 = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf); addrconf_disable_change(net, newf); } else if ((!newf) ^ (!old)) dev_disable_change((struct inet6_dev *)table->extra1); @@ -6530,7 +6524,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { struct net_device *dev; - net->ipv6.devconf_dflt->addr_gen_mode = new_val; + WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val); for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev && @@ -6542,7 +6536,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, } } - *((u32 *)ctl->data) = new_val; + WRITE_ONCE(*((u32 *)ctl->data), new_val); } out: @@ -6689,20 +6683,19 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) static int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) { + struct net *net = (struct net *)ctl->extra2; struct inet6_dev *idev; - struct net *net; - if (!rtnl_trylock()) - return restart_syscall(); - - *valp = val; - - net = (struct net *)ctl->extra2; if (valp == &net->ipv6.devconf_dflt->disable_policy) { - rtnl_unlock(); + WRITE_ONCE(*valp, val); return 0; } + if (!rtnl_trylock()) + return restart_syscall(); + + WRITE_ONCE(*valp, val); + if (valp == &net->ipv6.devconf_all->disable_policy) { struct net_device *dev; @@ -7490,7 +7483,8 @@ int __init addrconf_init(void) err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, inet6_netconf_dump_devconf, - RTNL_FLAG_DOIT_UNLOCKED); + RTNL_FLAG_DOIT_UNLOCKED | + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = ipv6_addr_label_rtnl_register(); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 727792907d6c..25a5b394481b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -379,9 +379,8 @@ static int ipv6_srh_rcv(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); - accept_seg6 = net->ipv6.devconf_all->seg6_enabled; - if (accept_seg6 > idev->cnf.seg6_enabled) - accept_seg6 = idev->cnf.seg6_enabled; + accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled), + READ_ONCE(idev->cnf.seg6_enabled)); if (!accept_seg6) { kfree_skb(skb); @@ -655,10 +654,13 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; struct net *net = dev_net(skb->dev); - int accept_source_route = net->ipv6.devconf_all->accept_source_route; + int accept_source_route; - if (idev && accept_source_route > idev->cnf.accept_source_route) - accept_source_route = idev->cnf.accept_source_route; + accept_source_route = READ_ONCE(net->ipv6.devconf_all->accept_source_route); + + if (idev) + accept_source_route = min(accept_source_route, + READ_ONCE(idev->cnf.accept_source_route)); if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + @@ -802,7 +804,7 @@ looped_back: ip6_route_input(skb); if (skb_dst(skb)->error) { - skb_push(skb, skb->data - skb_network_header(skb)); + skb_push(skb, -skb_network_offset(skb)); dst_input(skb); return -1; } @@ -819,7 +821,7 @@ looped_back: goto looped_back; } - skb_push(skb, skb->data - skb_network_header(skb)); + skb_push(skb, -skb_network_offset(skb)); dst_input(skb); return -1; @@ -919,7 +921,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) goto drop; /* Ignore if IOAM is not enabled on ingress */ - if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled) + if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled)) goto ignore; /* Truncated Option header */ diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 5fa923f06632..08c929513065 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -727,7 +727,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id; + raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -735,7 +735,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id; + raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -822,7 +822,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide; + raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); @@ -830,7 +830,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide; + raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index b8378814532c..133610a49da6 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -168,9 +168,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, SKB_DR_SET(reason, NOT_SPECIFIED); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || - !idev || unlikely(idev->cnf.disable_ipv6)) { + !idev || unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - if (idev && unlikely(idev->cnf.disable_ipv6)) + if (idev && unlikely(READ_ONCE(idev->cnf.disable_ipv6))) SKB_DR_SET(reason, IPV6DISABLED); goto drop; } @@ -236,7 +236,7 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, if (!ipv6_addr_is_multicast(&hdr->daddr) && (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) && - idev->cnf.drop_unicast_in_l2_multicast) { + READ_ONCE(idev->cnf.drop_unicast_in_l2_multicast)) { SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST); goto err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 31b86fe661aa..02eeca5492cd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -234,7 +234,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - if (unlikely(idev->cnf.disable_ipv6)) { + if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; @@ -501,7 +501,7 @@ int ip6_forward(struct sk_buff *skb) u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); - if (net->ipv6.devconf_all->forwarding == 0) + if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) goto error; if (skb->pkt_type != PACKET_HOST) @@ -513,8 +513,8 @@ int ip6_forward(struct sk_buff *skb) if (skb_warn_if_lro(skb)) goto drop; - if (!net->ipv6.devconf_all->disable_policy && - (!idev || !idev->cnf.disable_policy) && + if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) && + (!idev || !READ_ONCE(idev->cnf.disable_policy)) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; @@ -552,7 +552,7 @@ int ip6_forward(struct sk_buff *skb) } /* XXX: idev->cnf.proxy_ndp? */ - if (net->ipv6.devconf_all->proxy_ndp && + if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) { @@ -1925,7 +1925,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5fd07581efaf..e9cc315832cb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -247,7 +247,6 @@ static void ip6_dev_free(struct net_device *dev) gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); - free_percpu(dev->tstats); } static int ip6_tnl_create2(struct net_device *dev) @@ -1848,6 +1847,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->features |= NETIF_F_LLTX; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; netif_keep_dst(dev); dev->features |= IPXIPX_FEATURES; @@ -1873,13 +1873,10 @@ ip6_tnl_dev_init_gen(struct net_device *dev) t->dev = dev; t->net = dev_net(dev); - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); if (ret) - goto free_stats; + return ret; ret = gro_cells_init(&t->gro_cells, dev); if (ret) @@ -1903,9 +1900,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev) destroy_dst: dst_cache_destroy(&t->dst_cache); -free_stats: - free_percpu(dev->tstats); - dev->tstats = NULL; return ret; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 56c3c467f9de..d4c28ec1bc51 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -948,6 +948,8 @@ done: if (optlen < sizeof(int)) goto e_inval; retv = ip6_ra_control(sk, val); + if (retv == 0) + inet6_assign_bit(RTALERT, sk, valbool); break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); @@ -1346,7 +1348,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } if (val < 0) - val = sock_net(sk)->ipv6.devconf_all->hop_limit; + val = READ_ONCE(sock_net(sk)->ipv6.devconf_all->hop_limit); break; } @@ -1445,6 +1447,10 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->rxopt.bits.recvfragsize; break; + case IPV6_ROUTER_ALERT: + val = inet6_test_bit(RTALERT, sk); + break; + case IPV6_ROUTER_ALERT_ISOLATE: val = inet6_test_bit(RTALERT_ISOLATE, sk); break; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 76ee1615ff2a..7ba01d8cfbae 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -159,9 +159,9 @@ static int unsolicited_report_interval(struct inet6_dev *idev) int iv; if (mld_in_v1_mode(idev)) - iv = idev->cnf.mldv1_unsolicited_report_interval; + iv = READ_ONCE(idev->cnf.mldv1_unsolicited_report_interval); else - iv = idev->cnf.mldv2_unsolicited_report_interval; + iv = READ_ONCE(idev->cnf.mldv2_unsolicited_report_interval); return iv > 0 ? iv : 1; } @@ -1202,15 +1202,15 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, static int mld_force_mld_version(const struct inet6_dev *idev) { + const struct net *net = dev_net(idev->dev); + int all_force; + + all_force = READ_ONCE(net->ipv6.devconf_all->force_mld_version); /* Normally, both are 0 here. If enforcement to a particular is * being used, individual device enforcement will have a lower * precedence over 'all' device (.../conf/all/force_mld_version). */ - - if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0) - return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version; - else - return idev->cnf.force_mld_version; + return all_force ?: READ_ONCE(idev->cnf.force_mld_version); } static bool mld_in_v2_mode_only(const struct inet6_dev *idev) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 8523f0595b01..ae134634c323 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -451,7 +451,7 @@ static void ip6_nd_hdr(struct sk_buff *skb, rcu_read_lock(); idev = __in6_dev_get(skb->dev); - tclass = idev ? idev->cnf.ndisc_tclass : 0; + tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0; rcu_read_unlock(); skb_push(skb, sizeof(*hdr)); @@ -535,7 +535,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) override = false; - inc_opt |= ifp->idev->cnf.force_tllao; + inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao); in6_ifa_put(ifp); } else { if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, @@ -903,8 +903,9 @@ have_ifp: } if (ipv6_chk_acast_addr(net, dev, &msg->target) || - (idev->cnf.forwarding && - (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) && + (READ_ONCE(idev->cnf.forwarding) && + (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) || + READ_ONCE(idev->cnf.proxy_ndp)) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -929,7 +930,7 @@ have_ifp: } if (is_router < 0) - is_router = idev->cnf.forwarding; + is_router = READ_ONCE(idev->cnf.forwarding); if (dad) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target, @@ -973,7 +974,7 @@ static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr) { struct inet6_dev *idev = __in6_dev_get(dev); - switch (idev->cnf.accept_untracked_na) { + switch (READ_ONCE(idev->cnf.accept_untracked_na)) { case 0: /* Don't accept untracked na (absent in neighbor cache) */ return 0; case 1: /* Create new entries from na if currently untracked */ @@ -1024,7 +1025,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * drop_unsolicited_na takes precedence over accept_untracked_na */ if (!msg->icmph.icmp6_solicited && idev && - idev->cnf.drop_unsolicited_na) + READ_ONCE(idev->cnf.drop_unsolicited_na)) return reason; if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) @@ -1080,7 +1081,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * Note that we don't do a (daddr == all-routers-mcast) check. */ new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE; - if (!neigh && lladdr && idev && idev->cnf.forwarding) { + if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) { if (accept_untracked_na(dev, saddr)) { neigh = neigh_create(&nd_tbl, &msg->target, dev); new_state = NUD_STALE; @@ -1100,7 +1101,8 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && - net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && + READ_ONCE(net->ipv6.devconf_all->forwarding) && + READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.proxy_ndp */ goto out; @@ -1148,7 +1150,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb) } /* Don't accept RS if we're not in router mode */ - if (!idev->cnf.forwarding) + if (!READ_ONCE(idev->cnf.forwarding)) goto out; /* @@ -1318,7 +1320,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) if (old_if_flags != in6_dev->if_flags) send_ifinfo_notify = true; - if (!in6_dev->cnf.accept_ra_defrtr) { + if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) { ND_PRINTK(2, info, "RA: %s, defrtr is false for dev: %s\n", __func__, skb->dev->name); @@ -1326,7 +1328,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); - if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) { + if (lifetime != 0 && + lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) { ND_PRINTK(2, info, "RA: router lifetime (%ds) is too short: %s\n", lifetime, skb->dev->name); @@ -1337,7 +1340,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) * accept_ra_from_local is set to true. */ net = dev_net(in6_dev->dev); - if (!in6_dev->cnf.accept_ra_from_local && + if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: default router ignored\n", @@ -1349,7 +1352,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) pref = ra_msg->icmph.icmp6_router_pref; /* 10b is handled as if it were 00b (medium) */ if (pref == ICMPV6_ROUTER_PREF_INVALID || - !in6_dev->cnf.accept_ra_rtr_pref) + !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref)) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif /* routes added from RAs do not use nexthop objects */ @@ -1420,10 +1423,12 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) spin_unlock_bh(&table->tb6_lock); } - if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && + if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 && ra_msg->icmph.icmp6_hop_limit) { - if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { - in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <= + ra_msg->icmph.icmp6_hop_limit) { + WRITE_ONCE(in6_dev->cnf.hop_limit, + ra_msg->icmph.icmp6_hop_limit); fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { @@ -1505,7 +1510,7 @@ skip_linkparms: } #ifdef CONFIG_IPV6_ROUTE_INFO - if (!in6_dev->cnf.accept_ra_from_local && + if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, @@ -1514,7 +1519,7 @@ skip_linkparms: goto skip_routeinfo; } - if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) { + if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_ri; p; @@ -1526,14 +1531,14 @@ skip_linkparms: continue; #endif if (ri->prefix_len == 0 && - !in6_dev->cnf.accept_ra_defrtr) + !READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) continue; if (ri->lifetime != 0 && - ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft) + ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) continue; - if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen) + if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen)) continue; - if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) + if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen)) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, &ipv6_hdr(skb)->saddr); @@ -1553,7 +1558,7 @@ skip_routeinfo: } #endif - if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) { + if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_pi; p; @@ -1564,7 +1569,7 @@ skip_routeinfo: } } - if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) { + if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) { __be32 n; u32 mtu; @@ -1578,8 +1583,8 @@ skip_routeinfo: if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); - } else if (in6_dev->cnf.mtu6 != mtu) { - in6_dev->cnf.mtu6 = mtu; + } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) { + WRITE_ONCE(in6_dev->cnf.mtu6, mtu); fib6_metric_set(rt, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } @@ -1813,7 +1818,7 @@ static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) if (!idev) return true; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED && - idev->cnf.suppress_frag_ndisc) { + READ_ONCE(idev->cnf.suppress_frag_ndisc)) { net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n"); return true; } @@ -1890,8 +1895,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, idev = in6_dev_get(dev); if (!idev) break; - if (idev->cnf.ndisc_notify || - net->ipv6.devconf_all->ndisc_notify) + if (READ_ONCE(idev->cnf.ndisc_notify) || + READ_ONCE(net->ipv6.devconf_all->ndisc_notify)) ndisc_send_unsol_na(dev); in6_dev_put(idev); break; @@ -1900,8 +1905,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, if (!idev) evict_nocarrier = true; else { - evict_nocarrier = idev->cnf.ndisc_evict_nocarrier && - net->ipv6.devconf_all->ndisc_evict_nocarrier; + evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) && + READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier); in6_dev_put(idev); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index b2dd48911c8d..1a51a44571c3 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -327,9 +327,9 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, if (!reasm_data) goto err; - payload_len = ((skb->data - skb_network_header(skb)) - + payload_len = -skb_network_offset(skb) - sizeof(struct ipv6hdr) + fq->q.len - - sizeof(struct frag_hdr)); + sizeof(struct frag_hdr); if (payload_len > IPV6_MAXPLEN) { net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", payload_len); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 196dd4ecb5e2..dedee264b8f6 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -83,7 +83,7 @@ struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, - net->ipv6.devconf_all->hop_limit); + READ_ONCE(net->ipv6.devconf_all->hop_limit)); nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); @@ -124,7 +124,7 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, - net->ipv6.devconf_all->hop_limit); + READ_ONCE(net->ipv6.devconf_all->hop_limit)); skb_reset_transport_header(nskb); icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr)); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index b5205311f372..806d4b5dd1e6 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -111,9 +111,9 @@ int ip6_dst_hoplimit(struct dst_entry *dst) rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) - hoplimit = idev->cnf.hop_limit; + hoplimit = READ_ONCE(idev->cnf.hop_limit); else - hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; + hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); rcu_read_unlock(); } return hoplimit; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 76e6eb3b643d..779274055abf 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -615,7 +615,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; skb_put(skb, length); skb_reset_network_header(skb); iph = ipv6_hdr(skb); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5ebc47da1000..acb4f119e11f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -272,9 +272,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, if (!reasm_data) goto out_oom; - payload_len = ((skb->data - skb_network_header(skb)) - + payload_len = -skb_network_offset(skb) - sizeof(struct ipv6hdr) + fq->q.len - - sizeof(struct frag_hdr)); + sizeof(struct frag_hdr); if (payload_len > IPV6_MAXPLEN) goto out_oversize; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 707d65bc9c0e..1f4b935a0e57 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -645,14 +645,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh) write_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, - neigh->updated + idev->cnf.rtr_probe_interval)) { + neigh->updated + + READ_ONCE(idev->cnf.rtr_probe_interval))) { work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) __neigh_set_probe_once(neigh); } write_unlock_bh(&neigh->lock); } else if (time_after(jiffies, last_probe + - idev->cnf.rtr_probe_interval)) { + READ_ONCE(idev->cnf.rtr_probe_interval))) { work = kmalloc(sizeof(*work), GFP_ATOMIC); } @@ -1596,7 +1597,7 @@ static unsigned int fib6_mtu(const struct fib6_result *res) rcu_read_lock(); idev = __in6_dev_get(dev); - mtu = idev->cnf.mtu6; + mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); } @@ -2220,7 +2221,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, strict |= flags & RT6_LOOKUP_F_IFACE; strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; - if (net->ipv6.devconf_all->forwarding == 0) + if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) strict |= RT6_LOOKUP_F_REACHABLE; rcu_read_lock(); @@ -3249,8 +3250,8 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res, mtu = IPV6_MIN_MTU; idev = __in6_dev_get(dev); - if (idev && idev->cnf.mtu6 > mtu) - mtu = idev->cnf.mtu6; + if (idev) + mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6)); } mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); @@ -4149,7 +4150,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) return; - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) + if (READ_ONCE(in6_dev->cnf.forwarding) || + !READ_ONCE(in6_dev->cnf.accept_redirects)) return; /* RFC2461 8.1: @@ -4583,8 +4585,8 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, f6i->dst_nocount = true; if (!anycast && - (net->ipv6.devconf_all->disable_policy || - idev->cnf.disable_policy)) + (READ_ONCE(net->ipv6.devconf_all->disable_policy) || + READ_ONCE(idev->cnf.disable_policy))) f6i->dst_nopolicy = true; } @@ -5341,19 +5343,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { err = __ip6_ins_rt(nh->fib6_info, info, extack); - fib6_info_release(nh->fib6_info); - - if (!err) { - /* save reference to last route successfully inserted */ - rt_last = nh->fib6_info; - - /* save reference to first route for notification */ - if (!rt_notif) - rt_notif = nh->fib6_info; - } - /* nh->fib6_info is used or freed at this point, reset to NULL*/ - nh->fib6_info = NULL; if (err) { if (replace && nhn) NL_SET_ERR_MSG_MOD(extack, @@ -5361,6 +5351,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = nh; goto add_errout; } + /* save reference to last route successfully inserted */ + rt_last = nh->fib6_info; + + /* save reference to first route for notification */ + if (!rt_notif) + rt_notif = nh->fib6_info; /* Because each route is added like a single route we remove * these flags after the first nexthop: if there is a collision, @@ -5421,8 +5417,7 @@ add_errout: cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->fib6_info) - fib6_info_release(nh->fib6_info); + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index d43c50a7310d..861e0366f549 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -241,6 +241,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) struct sr6_tlv_hmac *tlv; struct ipv6_sr_hdr *srh; struct inet6_dev *idev; + int require_hmac; idev = __in6_dev_get(skb->dev); @@ -248,16 +249,17 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) tlv = seg6_get_tlv_hmac(srh); + require_hmac = READ_ONCE(idev->cnf.seg6_require_hmac); /* mandatory check but no tlv */ - if (idev->cnf.seg6_require_hmac > 0 && !tlv) + if (require_hmac > 0 && !tlv) return false; /* no check */ - if (idev->cnf.seg6_require_hmac < 0) + if (require_hmac < 0) return true; /* check only if present */ - if (idev->cnf.seg6_require_hmac == 0 && !tlv) + if (require_hmac == 0 && !tlv) return true; /* now, seg6_require_hmac >= 0 && tlv */ diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 6e36e5047fba..a17d783dc7c0 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -43,7 +43,7 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk, int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); - int nhlen = skb->data - skb_network_header(skb); + int nhlen = -skb_network_offset(skb); skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 34e03b9522c8..cfc8eb70e966 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -119,7 +119,8 @@ void rate_control_rate_update(struct ieee80211_local *local, rcu_read_unlock(); } - drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); + if (sta->uploaded) + drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); } int ieee80211_rate_control_register(const struct rate_control_ops *ops) diff --git a/net/mctp/route.c b/net/mctp/route.c index 40dd64c9e618..eefd7834d9a0 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -954,7 +954,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); if (!dev) { rcu_read_unlock(); - return rc; + goto out_free; } rt->dev = __mctp_dev_get(dev); rcu_read_unlock(); @@ -969,7 +969,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rt->mtu = 0; } else { - return -EINVAL; + rc = -EINVAL; + goto out_free; } spin_lock_irqsave(&rt->dev->addrs_lock, flags); @@ -1033,12 +1034,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rc = mctp_do_fragment_route(rt, skb, mtu, tag); } + /* route output functions consume the skb, even on error */ + skb = NULL; + out_release: if (!ext_rt) mctp_route_release(rt); mctp_dev_put(tmp_rt.dev); +out_free: + kfree_skb(skb); return rc; } diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index ef59e25dc482..8fc790f2a01b 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -55,8 +55,6 @@ static int mpls_xmit(struct sk_buff *skb) out_dev = dst->dev; net = dev_net(out_dev); - skb_orphan(skb); - if (!mpls_output_possible(out_dev) || !dst->lwtstate || skb_warn_if_lro(skb)) goto drop; diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 6ff6f14674aa..3ae46b545d2c 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -10,7 +10,6 @@ #include <linux/net.h> #include <linux/inet_diag.h> #include <net/netlink.h> -#include <uapi/linux/mptcp.h> #include "protocol.h" static int subflow_get_info(struct sock *sk, struct sk_buff *skb) @@ -21,6 +20,9 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) bool slow; int err; + if (inet_sk_state_load(sk) == TCP_LISTEN) + return 0; + start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index bd8ff5950c8d..0566dd793810 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -10,7 +10,6 @@ #include <linux/net.h> #include <linux/inet_diag.h> #include <net/netlink.h> -#include <uapi/linux/mptcp.h> #include "protocol.h" static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c index 670da7822e6c..c30a2a90a192 100644 --- a/net/mptcp/mptcp_pm_gen.c +++ b/net/mptcp/mptcp_pm_gen.c @@ -32,8 +32,9 @@ const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] }; /* MPTCP_PM_CMD_GET_ADDR - do */ -const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { - [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, }; /* MPTCP_PM_CMD_FLUSH_ADDRS - do */ @@ -110,7 +111,7 @@ const struct genl_ops mptcp_pm_nl_ops[11] = { .doit = mptcp_pm_nl_get_addr_doit, .dumpit = mptcp_pm_nl_get_addr_dumpit, .policy = mptcp_pm_get_addr_nl_policy, - .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .maxattr = MPTCP_PM_ATTR_TOKEN, .flags = GENL_UNS_ADMIN_PERM, }, { diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h index ac9fc7225b6a..e24258f6f819 100644 --- a/net/mptcp/mptcp_pm_gen.h +++ b/net/mptcp/mptcp_pm_gen.h @@ -18,7 +18,7 @@ extern const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADD extern const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; -extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; +extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ATTR_TOKEN + 1]; extern const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 23e317ffc901..27ca42c77b02 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -981,10 +981,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (mp_opt->deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); -set_fully_established: if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); +set_fully_established: mptcp_data_lock((struct sock *)msk); __mptcp_subflow_fully_established(msk, subflow, mp_opt); mptcp_data_unlock((struct sock *)msk); diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 53e0b08b1123..55406720c607 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -6,7 +6,6 @@ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> -#include <net/tcp.h> #include <net/mptcp.h> #include "protocol.h" @@ -441,13 +440,27 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); } -int mptcp_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup) +int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info) { - if (token) - return mptcp_userspace_pm_set_flags(net, token, loc, rem, bkup); - return mptcp_pm_nl_set_flags(net, loc, bkup); + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_get_addr(skb, info); + return mptcp_pm_nl_get_addr(skb, info); +} + +int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) +{ + const struct genl_info *info = genl_info_dump(cb); + + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_dump_addr(msg, cb); + return mptcp_pm_nl_dump_addr(msg, cb); +} + +int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info) +{ + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_set_flags(skb, info); + return mptcp_pm_nl_set_flags(skb, info); } void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d5a942b9ab29..5c17d39146ea 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -8,19 +8,13 @@ #include <linux/inet.h> #include <linux/kernel.h> -#include <net/tcp.h> #include <net/inet_common.h> #include <net/netns/generic.h> #include <net/mptcp.h> -#include <net/genetlink.h> -#include <uapi/linux/mptcp.h> #include "protocol.h" #include "mib.h" -/* forward declaration */ -static struct genl_family mptcp_genl_family; - static int pm_nl_pernet_id; struct mptcp_pm_add_entry { @@ -505,15 +499,12 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) } static struct mptcp_pm_addr_entry * -__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, - bool lookup_by_id) +__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) { struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && - mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) || - (lookup_by_id && entry->addr.id == info->id)) + if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) return entry; } return NULL; @@ -543,7 +534,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); rcu_read_lock(); - entry = __lookup_addr(pernet, &mpc_addr, false); + entry = __lookup_addr(pernet, &mpc_addr); if (entry) { __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); msk->mpc_endpoint_id = entry->addr.id; @@ -1550,8 +1541,8 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) } } -void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list) +static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; @@ -1636,8 +1627,8 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) return 0; } -static int mptcp_nl_fill_addr(struct sk_buff *skb, - struct mptcp_pm_addr_entry *entry) +int mptcp_nl_fill_addr(struct sk_buff *skb, + struct mptcp_pm_addr_entry *entry) { struct mptcp_addr_info *addr = &entry->addr; struct nlattr *attr; @@ -1675,7 +1666,7 @@ nla_put_failure: return -EMSGSIZE; } -int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); @@ -1725,8 +1716,13 @@ fail: return ret; } -int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + return mptcp_pm_get_addr(skb, info); +} + +int mptcp_pm_nl_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb) { struct net *net = sock_net(msg->sk); struct mptcp_pm_addr_entry *entry; @@ -1768,6 +1764,12 @@ int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, return msg->len; } +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return mptcp_pm_dump_addr(msg, cb); +} + static int parse_limit(struct genl_info *info, int id, unsigned int *limit) { struct nlattr *attr = info->attrs[id]; @@ -1882,66 +1884,63 @@ next: return ret; } -int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup) +int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) { - struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); + struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; + struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | MPTCP_PM_ADDR_FLAG_FULLMESH; + struct net *net = sock_net(skb->sk); struct mptcp_pm_addr_entry *entry; + struct pm_nl_pernet *pernet; u8 lookup_by_id = 0; + u8 bkup = 0; + int ret; + + pernet = pm_nl_get_pernet(net); + + ret = mptcp_pm_parse_entry(attr, info, false, &addr); + if (ret < 0) + return ret; - if (addr->addr.family == AF_UNSPEC) { + if (addr.addr.family == AF_UNSPEC) { lookup_by_id = 1; - if (!addr->addr.id) + if (!addr.addr.id) { + GENL_SET_ERR_MSG(info, "missing required inputs"); return -EOPNOTSUPP; + } } + if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + bkup = 1; + spin_lock_bh(&pernet->lock); - entry = __lookup_addr(pernet, &addr->addr, lookup_by_id); + entry = lookup_by_id ? __lookup_addr_by_id(pernet, addr.addr.id) : + __lookup_addr(pernet, &addr.addr); if (!entry) { spin_unlock_bh(&pernet->lock); + GENL_SET_ERR_MSG(info, "address not found"); return -EINVAL; } - if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { spin_unlock_bh(&pernet->lock); + GENL_SET_ERR_MSG(info, "invalid addr flags"); return -EINVAL; } - changed = (addr->flags ^ entry->flags) & mask; - entry->flags = (entry->flags & ~mask) | (addr->flags & mask); - *addr = *entry; + changed = (addr.flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (addr.flags & mask); + addr = *entry; spin_unlock_bh(&pernet->lock); - mptcp_nl_set_flags(net, &addr->addr, bkup, changed); + mptcp_nl_set_flags(net, &addr.addr, bkup, changed); return 0; } int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) { - struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, }; - struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; - struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; - struct net *net = sock_net(skb->sk); - u8 bkup = 0; - int ret; - - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - return ret; - - if (attr_rem) { - ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote); - if (ret < 0) - return ret; - } - - if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) - bkup = 1; - - return mptcp_pm_set_flags(net, token, &addr, &remote, bkup); + return mptcp_pm_set_flags(skb, info); } static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) @@ -2281,7 +2280,7 @@ nla_put_failure: nlmsg_free(skb); } -static struct genl_family mptcp_genl_family __ro_after_init = { +struct genl_family mptcp_genl_family __ro_after_init = { .name = MPTCP_PM_NAME, .version = MPTCP_PM_VER, .netnsok = true, diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index d396a5973429..9f5d422d5ef6 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -106,19 +106,26 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, return -EINVAL; } +static struct mptcp_pm_addr_entry * +mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (entry->addr.id == id) + return entry; + } + return NULL; +} + int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) { - struct mptcp_pm_addr_entry *entry, *match = NULL; + struct mptcp_pm_addr_entry *match; spin_lock_bh(&msk->pm.lock); - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { - if (id == entry->addr.id) { - match = entry; - break; - } - } + match = mptcp_userspace_pm_lookup_addr_by_id(msk, id); spin_unlock_bh(&msk->pm.lock); if (match) { *flags = match->flags; @@ -261,7 +268,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; - struct mptcp_pm_addr_entry *match = NULL; + struct mptcp_pm_addr_entry *match; struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; LIST_HEAD(free_list); @@ -298,13 +305,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) lock_sock(sk); - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { - if (entry->addr.id == id_val) { - match = entry; - break; - } - } - + match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val); if (!match) { GENL_SET_ERR_MSG(info, "address with specified id not found"); release_sock(sk); @@ -334,7 +335,6 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry local = { 0 }; struct mptcp_addr_info addr_r; - struct mptcp_addr_info addr_l; struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; @@ -360,25 +360,31 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) goto create_err; } - err = mptcp_pm_parse_addr(laddr, info, &addr_l); + err = mptcp_pm_parse_entry(laddr, info, true, &local); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); goto create_err; } + if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + GENL_SET_ERR_MSG(info, "invalid addr flags"); + err = -EINVAL; + goto create_err; + } + local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; + err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); goto create_err; } - if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) { + if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); err = -EINVAL; goto create_err; } - local.addr = addr_l; err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); @@ -387,7 +393,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) lock_sock(sk); - err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); + err = __mptcp_subflow_connect(sk, &local.addr, &addr_r); release_sock(sk); @@ -495,6 +501,16 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info goto destroy_err; } +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { + ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6); + addr_l.family = AF_INET6; + } + if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) { + ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6); + addr_r.family = AF_INET6; + } +#endif if (addr_l.family != addr_r.family) { GENL_SET_ERR_MSG(info, "address families do not match"); err = -EINVAL; @@ -530,35 +546,194 @@ destroy_err: return err; } -int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup) +int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) { + struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; + struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, }; + struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct net *net = sock_net(skb->sk); struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; u32 token_val; + u8 bkup = 0; token_val = nla_get_u32(token); msk = mptcp_token_get_sock(net, token_val); - if (!msk) + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); return ret; + } sk = (struct sock *)msk; - if (!mptcp_pm_is_userspace(msk)) + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "userspace PM not selected"); goto set_flags_err; + } - if (loc->addr.family == AF_UNSPEC || - rem->addr.family == AF_UNSPEC) + ret = mptcp_pm_parse_entry(attr, info, false, &loc); + if (ret < 0) goto set_flags_err; + if (attr_rem) { + ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem); + if (ret < 0) + goto set_flags_err; + } + + if (loc.addr.family == AF_UNSPEC || + rem.addr.family == AF_UNSPEC) { + GENL_SET_ERR_MSG(info, "invalid address families"); + ret = -EINVAL; + goto set_flags_err; + } + + if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + bkup = 1; + lock_sock(sk); - ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup); + ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup); release_sock(sk); set_flags_err: sock_put(sk); return ret; } + +int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct id_bitmap { + DECLARE_BITMAP(map, MPTCP_PM_MAX_ADDR_ID + 1); + } *bitmap; + const struct genl_info *info = genl_info_dump(cb); + struct net *net = sock_net(msg->sk); + struct mptcp_pm_addr_entry *entry; + struct mptcp_sock *msk; + struct nlattr *token; + int ret = -EINVAL; + struct sock *sk; + void *hdr; + + bitmap = (struct id_bitmap *)cb->ctx; + token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + + msk = mptcp_token_get_sock(net, nla_get_u32(token)); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return ret; + } + + sk = (struct sock *)msk; + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto out; + } + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (test_bit(entry->addr.id, bitmap->map)) + continue; + + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &mptcp_genl_family, + NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); + if (!hdr) + break; + + if (mptcp_nl_fill_addr(msg, entry) < 0) { + genlmsg_cancel(msg, hdr); + break; + } + + __set_bit(entry->addr.id, bitmap->map); + genlmsg_end(msg, hdr); + } + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + ret = msg->len; + +out: + sock_put(sk); + return ret; +} + +int mptcp_userspace_pm_get_addr(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct mptcp_pm_addr_entry addr, *entry; + struct net *net = sock_net(skb->sk); + struct mptcp_sock *msk; + struct sk_buff *msg; + int ret = -EINVAL; + struct sock *sk; + void *reply; + + msk = mptcp_token_get_sock(net, nla_get_u32(token)); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return ret; + } + + sk = (struct sock *)msk; + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto out; + } + + ret = mptcp_pm_parse_entry(attr, info, false, &addr); + if (ret < 0) + goto out; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto out; + } + + reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, + info->genlhdr->cmd); + if (!reply) { + GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); + ret = -EMSGSIZE; + goto fail; + } + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + entry = mptcp_userspace_pm_lookup_addr_by_id(msk, addr.addr.id); + if (!entry) { + GENL_SET_ERR_MSG(info, "address not found"); + ret = -EINVAL; + goto unlock_fail; + } + + ret = mptcp_nl_fill_addr(msg, entry); + if (ret) + goto unlock_fail; + + genlmsg_end(msg, reply); + ret = genlmsg_reply(msg, info); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + sock_put(sk); + return ret; + +unlock_fail: + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); +fail: + nlmsg_free(msg); +out: + sock_put(sk); + return ret; +} diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9df4eaddfd48..3a1967bc7bad 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -15,7 +15,6 @@ #include <net/inet_common.h> #include <net/inet_hashtables.h> #include <net/protocol.h> -#include <net/tcp.h> #include <net/tcp_states.h> #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include <net/transp_v6.h> @@ -1265,6 +1264,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext = mptcp_get_ext(skb); if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { TCP_SKB_CB(skb)->eor = 1; + tcp_mark_push(tcp_sk(ssk), skb); goto alloc_skb; } @@ -1691,15 +1691,6 @@ out: } } -static void mptcp_set_nospace(struct sock *sk) -{ - /* enable autotune */ - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - - /* will be cleared on avail space */ - set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); -} - static int mptcp_disconnect(struct sock *sk, int flags); static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, @@ -1770,6 +1761,30 @@ static int do_copy_data_nocache(struct sock *sk, int copy, return 0; } +/* open-code sk_stream_memory_free() plus sent limit computation to + * avoid indirect calls in fast-path. + * Called under the msk socket lock, so we can avoid a bunch of ONCE + * annotations. + */ +static u32 mptcp_send_limit(const struct sock *sk) +{ + const struct mptcp_sock *msk = mptcp_sk(sk); + u32 limit, not_sent; + + if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf)) + return 0; + + limit = mptcp_notsent_lowat(sk); + if (limit == UINT_MAX) + return UINT_MAX; + + not_sent = msk->write_seq - msk->snd_nxt; + if (not_sent >= limit) + return 0; + + return limit - not_sent; +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1814,6 +1829,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct mptcp_data_frag *dfrag; bool dfrag_collapsed; size_t psize, offset; + u32 copy_limit; + + /* ensure fitting the notsent_lowat() constraint */ + copy_limit = mptcp_send_limit(sk); + if (!copy_limit) + goto wait_for_memory; /* reuse tail pfrag, if possible, or carve a new one from the * page allocator @@ -1821,9 +1842,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dfrag = mptcp_pending_tail(sk); dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag); if (!dfrag_collapsed) { - if (!sk_stream_memory_free(sk)) - goto wait_for_memory; - if (!mptcp_page_frag_refill(sk, pfrag)) goto wait_for_memory; @@ -1838,6 +1856,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) offset = dfrag->offset + dfrag->data_len; psize = pfrag->size - offset; psize = min_t(size_t, psize, msg_data_left(msg)); + psize = min_t(size_t, psize, copy_limit); total_ts = psize + frag_truesize; if (!sk_wmem_schedule(sk, total_ts)) @@ -1873,7 +1892,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) continue; wait_for_memory: - mptcp_set_nospace(sk); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); __mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) @@ -3182,8 +3201,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } + +static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk) +{ + const struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt; + struct ipv6_pinfo *newnp; + + newnp = inet6_sk(newsk); + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + if (!opt) + net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__); + } + RCU_INIT_POINTER(newnp->opt, opt); + rcu_read_unlock(); +} #endif +static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk) +{ + struct ip_options_rcu *inet_opt, *newopt = NULL; + const struct inet_sock *inet = inet_sk(sk); + struct inet_sock *newinet; + + newinet = inet_sk(newsk); + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + + inet_opt->opt.optlen, GFP_ATOMIC); + if (newopt) + memcpy(newopt, inet_opt, sizeof(*inet_opt) + + inet_opt->opt.optlen); + else + net_warn_ratelimited("%s: Failed to copy ip options\n", __func__); + } + RCU_INIT_POINTER(newinet->inet_opt, newopt); + rcu_read_unlock(); +} + struct sock *mptcp_sk_clone_init(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct sock *ssk, @@ -3204,6 +3265,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, __mptcp_init_sock(nsk); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (nsk->sk_family == AF_INET6) + mptcp_copy_ip6_options(nsk, sk); + else +#endif + mptcp_copy_ip_options(nsk, sk); + msk = mptcp_sk(nsk); WRITE_ONCE(msk->local_key, subflow_req->local_key); WRITE_ONCE(msk->token, subflow_req->token); @@ -3215,7 +3283,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1); WRITE_ONCE(msk->snd_nxt, msk->write_seq); WRITE_ONCE(msk->snd_una, msk->write_seq); - WRITE_ONCE(msk->wnd_end, msk->snd_nxt + req->rsk_rcv_wnd); + WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; mptcp_init_sched(msk, mptcp_sk(sk)->sched); @@ -3719,6 +3787,7 @@ static struct proto mptcp_prot = { .unhash = mptcp_unhash, .get_port = mptcp_get_port, .forward_alloc_get = mptcp_forward_alloc_get, + .stream_memory_free = mptcp_stream_memory_free, .sockets_allocated = &mptcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, @@ -3892,12 +3961,12 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; - if (sk_stream_is_writeable(sk)) + if (__mptcp_stream_is_writeable(sk, 1)) return EPOLLOUT | EPOLLWRNORM; - mptcp_set_nospace(sk); - smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */ - if (sk_stream_is_writeable(sk)) + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + smp_mb__after_atomic(); /* NOSPACE is changed by mptcp_write_space() */ + if (__mptcp_stream_is_writeable(sk, 1)) return EPOLLOUT | EPOLLWRNORM; return 0; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 051b100d9403..f16edef6026a 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -113,10 +113,9 @@ #define MPTCP_RST_TRANSIENT BIT(0) /* MPTCP socket atomic flags */ -#define MPTCP_NOSPACE 1 -#define MPTCP_WORK_RTX 2 -#define MPTCP_FALLBACK_DONE 4 -#define MPTCP_WORK_CLOSE_SUBFLOW 5 +#define MPTCP_WORK_RTX 1 +#define MPTCP_FALLBACK_DONE 2 +#define MPTCP_WORK_CLOSE_SUBFLOW 3 /* MPTCP socket release cb flags */ #define MPTCP_PUSH_PENDING 1 @@ -308,6 +307,7 @@ struct mptcp_sock { in_accept_queue:1, free_first:1, rcvspace_init:1; + u32 notsent_lowat; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -343,6 +343,8 @@ struct mptcp_sock { #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) +extern struct genl_family mptcp_genl_family; + static inline void msk_owned_by_me(const struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); @@ -808,6 +810,38 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } +static inline u32 mptcp_notsent_lowat(const struct sock *sk) +{ + struct net *net = sock_net(sk); + u32 val; + + val = READ_ONCE(mptcp_sk(sk)->notsent_lowat); + return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); +} + +static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake) +{ + const struct mptcp_sock *msk = mptcp_sk(sk); + u32 notsent_bytes; + + notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt); + return (notsent_bytes << wake) < mptcp_notsent_lowat(sk); +} + +static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake) +{ + return mptcp_stream_memory_free(sk, wake) && + __sk_stream_is_writeable(sk, wake); +} + +static inline void mptcp_write_space(struct sock *sk) +{ + /* pairs with memory barrier in mptcp_poll */ + smp_mb(); + if (mptcp_stream_memory_free(sk, 1)) + sk_stream_write_space(sk); +} + static inline void __mptcp_sync_sndbuf(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -826,6 +860,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk) /* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */ WRITE_ONCE(sk->sk_sndbuf, new_sndbuf); + mptcp_write_space(sk); } /* The called held both the msk socket and the subflow socket locks, @@ -856,16 +891,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) local_bh_enable(); } -static inline void mptcp_write_space(struct sock *sk) -{ - if (sk_stream_is_writeable(sk)) { - /* pairs with memory barrier in mptcp_poll */ - smp_mb(); - if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags)) - sk_stream_write_space(sk); - } -} - void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags); #define MPTCP_TOKEN_MAX_RETRIES 4 @@ -945,21 +970,15 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex); -int mptcp_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup); -int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup); -int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup); +int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); -void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); @@ -975,6 +994,8 @@ void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflo const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); +int mptcp_nl_fill_addr(struct sk_buff *skb, + struct mptcp_pm_addr_entry *entry); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { @@ -1039,6 +1060,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); +int mptcp_pm_nl_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb); +int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb); +int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info); +int mptcp_userspace_pm_get_addr(struct sk_buff *skb, + struct genl_info *info); static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) { diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index da37e4541a5d..dcd1c76d2a3b 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -624,20 +624,11 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t return ret; } -static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) +static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val; - if (optlen < sizeof(int)) - return -EINVAL; - - if (copy_from_sockptr(&val, optval, sizeof(val))) - return -EFAULT; - - lock_sock(sk); sockopt_seq_inc(msk); msk->cork = !!val; mptcp_for_each_subflow(msk, subflow) { @@ -649,25 +640,15 @@ static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optva } if (!val) mptcp_check_and_set_pending(sk); - release_sock(sk); return 0; } -static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) +static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val; - - if (optlen < sizeof(int)) - return -EINVAL; - - if (copy_from_sockptr(&val, optval, sizeof(val))) - return -EFAULT; - lock_sock(sk); sockopt_seq_inc(msk); msk->nodelay = !!val; mptcp_for_each_subflow(msk, subflow) { @@ -679,8 +660,6 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op } if (val) mptcp_check_and_set_pending(sk); - release_sock(sk); - return 0; } @@ -803,25 +782,10 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, int ret, val; switch (optname) { - case TCP_INQ: - ret = mptcp_get_int_option(msk, optval, optlen, &val); - if (ret) - return ret; - if (val < 0 || val > 1) - return -EINVAL; - - lock_sock(sk); - msk->recvmsg_inq = !!val; - release_sock(sk); - return 0; case TCP_ULP: return -EOPNOTSUPP; case TCP_CONGESTION: return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); - case TCP_CORK: - return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); - case TCP_NODELAY: - return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); case TCP_DEFER_ACCEPT: /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); @@ -834,7 +798,34 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, optval, optlen); } - return -EOPNOTSUPP; + ret = mptcp_get_int_option(msk, optval, optlen, &val); + if (ret) + return ret; + + lock_sock(sk); + switch (optname) { + case TCP_INQ: + if (val < 0 || val > 1) + ret = -EINVAL; + else + msk->recvmsg_inq = !!val; + break; + case TCP_NOTSENT_LOWAT: + WRITE_ONCE(msk->notsent_lowat, val); + mptcp_write_space(sk); + break; + case TCP_CORK: + ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); + break; + case TCP_NODELAY: + ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); + break; + default: + ret = -ENOPROTOOPT; + } + + release_sock(sk); + return ret; } int mptcp_setsockopt(struct sock *sk, int level, int optname, @@ -1349,6 +1340,8 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_put_int_option(msk, optval, optlen, msk->cork); case TCP_NODELAY: return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); + case TCP_NOTSENT_LOWAT: + return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); } return -EOPNOTSUPP; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 6403c56f2902..1626dd20c68f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -15,13 +15,11 @@ #include <net/inet_common.h> #include <net/inet_hashtables.h> #include <net/protocol.h> -#include <net/tcp.h> #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include <net/ip6_route.h> #include <net/transp_v6.h> #endif #include <net/mptcp.h> -#include <uapi/linux/mptcp.h> #include "protocol.h" #include "mib.h" diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 475358ec8212..d2492d050fe6 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -467,7 +467,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status) __bpf_kfunc_end_defs(); -BTF_SET8_START(nf_ct_kfunc_set) +BTF_KFUNCS_START(nf_ct_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL) @@ -478,7 +478,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) -BTF_SET8_END(nf_ct_kfunc_set) +BTF_KFUNCS_END(nf_ct_kfunc_set) static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 90e6bd2c3000..c63868666bd9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = { .get_tuple_skb = nf_conntrack_get_tuple_skb, .attach = nf_conntrack_attach, .set_closing = nf_conntrack_set_closing, + .confirm = __nf_conntrack_confirm, }; void nf_conntrack_init_end(void) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index e697a824b001..540d97715bd2 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Get fields bitmap */ if (nf_h323_error_boundary(bs, 0, f->sz)) return H323_ERROR_BOUND; + if (f->sz > 32) + return H323_ERROR_RANGE; bmp = get_bitmap(bs, f->sz); if (base) *(unsigned int *)base = bmp; @@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, bmp2_len = get_bits(bs, 7) + 1; if (nf_h323_error_boundary(bs, 0, bmp2_len)) return H323_ERROR_BOUND; + if (bmp2_len > 32) + return H323_ERROR_RANGE; bmp2 = get_bitmap(bs, bmp2_len); bmp |= bmp2 >> f->sz; if (base) diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c index 6e3b2f58855f..481be15609b1 100644 --- a/net/netfilter/nf_nat_bpf.c +++ b/net/netfilter/nf_nat_bpf.c @@ -54,9 +54,9 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct, __bpf_kfunc_end_defs(); -BTF_SET8_START(nf_nat_kfunc_set) +BTF_KFUNCS_START(nf_nat_kfunc_set) BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) -BTF_SET8_END(nf_nat_kfunc_set) +BTF_KFUNCS_END(nf_nat_kfunc_set) static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index fbbc4fd37349..5b140c12b7df 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -800,7 +800,7 @@ synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb, skb_reset_network_header(skb); iph = skb_put(skb, sizeof(*iph)); ip6_flow_hdr(iph, 0, 0); - iph->hop_limit = net->ipv6.devconf_all->hop_limit; + iph->hop_limit = READ_ONCE(net->ipv6.devconf_all->hop_limit); iph->nexthdr = IPPROTO_TCP; iph->saddr = *saddr; iph->daddr = *daddr; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 224e5fb6a916..e93f905e60b6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5008,6 +5008,12 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == + (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; + if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) == + (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } desc.dtype = 0; @@ -5431,6 +5437,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 1f9474fefe84..d3d11dede545 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -359,10 +359,20 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && ctx->family != NFPROTO_BRIDGE && ctx->family != NFPROTO_ARP) return -EOPNOTSUPP; + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -610,10 +620,20 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && ctx->family != NFPROTO_BRIDGE && ctx->family != NFPROTO_ARP) return -EOPNOTSUPP; + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bfd3e5a14dab..255640013ab8 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1256,14 +1256,13 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, switch (priv->l3num) { case NFPROTO_IPV4: case NFPROTO_IPV6: - if (priv->l3num != ctx->family) - return -EINVAL; + if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET) + break; - fallthrough; - case NFPROTO_INET: - break; + return -EINVAL; + case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */ default: - return -EOPNOTSUPP; + return -EAFNOSUPPORT; } priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 765921cf1194..da846212fb9b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group) static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, gfp_t gfp_mask) { - unsigned int len = skb_end_offset(skb); + unsigned int len = skb->len; struct sk_buff *new; new = alloc_skb(len, gfp_mask); @@ -2267,6 +2267,15 @@ static int netlink_dump(struct sock *sk, bool lock_taken) if (extra_mutex) mutex_unlock(extra_mutex); + /* EMSGSIZE plus something already in the skb means + * that there's more to dump but current skb has filled up. + * If the callback really wants to return EMSGSIZE to user space + * it needs to do so again, on the next cb->dump() call, + * without putting data in the skb. + */ + if (nlk->dump_done_errno == -EMSGSIZE && skb->len) + nlk->dump_done_errno = skb->len; + cb->extack = NULL; } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 50ec599a5cff..3b7666944b11 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1232,7 +1232,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) - return -1; + return -EMSGSIZE; if (nla_put_string(skb, CTRL_ATTR_FAMILY_NAME, family->name) || nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id) || @@ -1355,6 +1355,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); int fams_to_skip = cb->args[0]; unsigned int id; + int err = 0; idr_for_each_entry(&genl_fam_idr, rt, id) { if (!rt->netnsok && !net_eq(net, &init_net)) @@ -1363,16 +1364,17 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) if (n++ < fams_to_skip) continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - skb, CTRL_CMD_NEWFAMILY) < 0) { + err = ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, CTRL_CMD_NEWFAMILY); + if (err) { n--; break; } } cb->args[0] = n; - return skb->len; + return err; } static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family, diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 0eed00184adf..104a80b75477 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -453,16 +453,16 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr_init_timers(sk); nr->t1 = - msecs_to_jiffies(sysctl_netrom_transport_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = - msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay)); nr->n2 = - msecs_to_jiffies(sysctl_netrom_transport_maximum_tries); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = - msecs_to_jiffies(sysctl_netrom_transport_busy_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = - msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); - nr->window = sysctl_netrom_transport_requested_window_size; + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout)); + nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; nr->state = NR_STATE_0; @@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) * G8PZT's Xrouter which is sending packets with command type 7 * as an extension of the protocol. */ - if (sysctl_netrom_reset_circuit && + if (READ_ONCE(sysctl_netrom_reset_circuit) && (frametype != NR_RESET || flags != 0)) nr_transmit_reset(skb, 1); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 3aaac4a22b38..2c34389c3ce6 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev, buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; - *buff++ = sysctl_netrom_network_ttl_initialiser; + *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); *buff++ = NR_PROTO_IP; *buff++ = NR_PROTO_IP; diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 2f084b6f69d7..97944db6b5ac 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -262,7 +262,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 44929657f5b7..5e531394a724 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (!nr_route_frame(skb, NULL)) { kfree_skb(skb); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index baea3cbd76ca..70480869ad1c 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, nr_neigh->digipeat = NULL; nr_neigh->ax25 = NULL; nr_neigh->dev = dev; - nr_neigh->quality = sysctl_netrom_default_path_quality; + nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality); nr_neigh->locked = 0; nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; @@ -728,7 +728,7 @@ void nr_link_failed(ax25_cb *ax25, int reason) nr_neigh->ax25 = NULL; ax25_cb_put(ax25); - if (++nr_neigh->failed < sysctl_netrom_link_fails_count) { + if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) { nr_neigh_put(nr_neigh); return; } @@ -766,7 +766,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (ax25 != NULL) { ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, ax25->ax25_dev->dev, 0, - sysctl_netrom_obsolescence_count_initialiser); + READ_ONCE(sysctl_netrom_obsolescence_count_initialiser)); if (ret) return ret; } @@ -780,7 +780,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) return ret; } - if (!sysctl_netrom_routing_control && ax25 != NULL) + if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL) return 0; /* Its Time-To-Live has expired */ diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index e2d2af924cff..c3bbd5880850 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype) *dptr++ = nr->my_id; *dptr++ = frametype; *dptr++ = nr->window; - if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser; + if (nr->bpqext) + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); break; case NR_DISCREQ: @@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (mine) { *dptr++ = 0; diff --git a/net/nfc/core.c b/net/nfc/core.c index eb2c0959e5b6..e58dc6405054 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1015,7 +1015,7 @@ static void nfc_check_pres_timeout(struct timer_list *t) schedule_work(&dev->check_pres_work); } -struct class nfc_class = { +const struct class nfc_class = { .name = "nfc", .dev_release = nfc_release, }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9bbc2686690..0db31ca4982d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2057,7 +2057,7 @@ retry: skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; skb_setup_tx_timestamp(skb, sockc.tsflags); if (unlikely(extra_len == 4)) @@ -2586,6 +2586,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); skb->tstamp = sockc->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3064,6 +3065,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; + skb->mono_delivery_time = !!skb->tstamp; if (unlikely(extra_len == 4)) skb->no_fcs = 1; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index fba82d36593a..a4e3c5de998b 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, kfree(sg); } ret = PTR_ERR(trans_private); + /* Trigger connection so that its ready for the next retry */ + if (ret == -ENODEV) + rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/rds/send.c b/net/rds/send.c index 5e57a1581dc6..2899def23865 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1313,12 +1313,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct); - if (ret) { - /* Trigger connection so that its ready for the next retry */ - if (ret == -EAGAIN) - rds_conn_connect_if_down(conn); + if (ret) goto out; - } if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 9f2c58c5a86b..2adb92b8c469 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -806,6 +806,16 @@ static void smc_pnet_create_pnetids_list(struct net *net) u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct net_device *dev; + /* Newly created netns do not have devices. + * Do not even acquire rtnl. + */ + if (list_empty(&net->dev_base_head)) + return; + + /* Note: This might not be needed, because smc_pnet_netdev_event() + * is also calling smc_pnet_add_base_pnetid() when handling + * NETDEV_UP event. + */ rtnl_lock(); for_each_netdev(net, dev) smc_pnet_add_base_pnetid(net, dev, ndev_pnetid); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index de96959336c4..211f57164cb6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -52,6 +52,7 @@ struct tls_decrypt_arg { struct_group(inargs, bool zc; bool async; + bool async_done; u8 tail; ); @@ -274,22 +275,30 @@ static int tls_do_decryption(struct sock *sk, DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { + DECLARE_CRYPTO_WAIT(wait); + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, &ctx->async_wait); + crypto_req_done, &wait); + ret = crypto_aead_decrypt(aead_req); + if (ret == -EINPROGRESS || ret == -EBUSY) + ret = crypto_wait_req(ret, &wait); + return ret; } ret = crypto_aead_decrypt(aead_req); + if (ret == -EINPROGRESS) + return 0; + if (ret == -EBUSY) { ret = tls_decrypt_async_wait(ctx); - ret = ret ?: -EINPROGRESS; + darg->async_done = true; + /* all completions have run, we're not doing async anymore */ + darg->async = false; + return ret; } - if (ret == -EINPROGRESS) { - if (darg->async) - return 0; - ret = crypto_wait_req(ret, &ctx->async_wait); - } + atomic_dec(&ctx->decrypt_pending); darg->async = false; return ret; @@ -1588,8 +1597,11 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, data_len + prot->tail_size, aead_req, darg); - if (err) + if (err) { + if (darg->async_done) + goto exit_free_skb; goto exit_free_pages; + } darg->skb = clear_skb ?: tls_strp_msg(ctx); clear_skb = NULL; @@ -1601,6 +1613,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, return err; } + if (unlikely(darg->async_done)) + return 0; + if (prot->tail_size) darg->tail = dctx->tail; @@ -1948,6 +1963,7 @@ int tls_sw_recvmsg(struct sock *sk, struct strp_msg *rxm; struct tls_msg *tlm; ssize_t copied = 0; + ssize_t peeked = 0; bool async = false; int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); @@ -2095,8 +2111,10 @@ put_on_rx_list: if (err < 0) goto put_on_rx_list_err; - if (is_peek) + if (is_peek) { + peeked += chunk; goto put_on_rx_list; + } if (partially_consumed) { rxm->offset += chunk; @@ -2135,8 +2153,8 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek, NULL); + err = process_rx_list(ctx, msg, &control, copied + peeked, + decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index bb65b684e26a..b4edba6b0b7b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4202,6 +4202,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; + if (otype != NL80211_IFTYPE_MESH_POINT) + return -EINVAL; if (netif_running(dev)) return -EBUSY; diff --git a/net/x25/Kconfig b/net/x25/Kconfig index 68729aa3a5d5..dc72302cbd07 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -17,8 +17,6 @@ config X25 if you want that) and the lower level data link layer protocol LAPB (say Y to "LAPB Data Link Driver" below if you want that). - You can read more about X.25 at <https://www.sangoma.com/tutorials/x25/> and - <http://docwiki.cisco.com/wiki/X.25>. Information about X.25 for Linux is contained in the files <file:Documentation/networking/x25.rst> and <file:Documentation/networking/x25-iface.rst>. diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index b78c0e095e22..3404d076a8a3 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -313,10 +313,13 @@ static bool xsk_is_bound(struct xdp_sock *xs) static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { + struct net_device *dev = xdp->rxq->dev; + u32 qid = xdp->rxq->queue_index; + if (!xsk_is_bound(xs)) return -ENXIO; - if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) + if (!dev->_rx[qid].pool || xs->umem != dev->_rx[qid].pool->umem) return -EINVAL; if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 3784534c9185..653e51ae3964 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -407,7 +407,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct net_device *dev = x->xso.dev; - if (!x->type_offload || x->encap) + if (!x->type_offload) return false; if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET || diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c index 7d5e920141e9..5ea15037ebd1 100644 --- a/net/xfrm/xfrm_interface_bpf.c +++ b/net/xfrm/xfrm_interface_bpf.c @@ -93,10 +93,10 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp __bpf_kfunc_end_defs(); -BTF_SET8_START(xfrm_ifc_kfunc_set) +BTF_KFUNCS_START(xfrm_ifc_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info) BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info) -BTF_SET8_END(xfrm_ifc_kfunc_set) +BTF_KFUNCS_END(xfrm_ifc_kfunc_set) static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 662c83beb345..e5722c95b8bb 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -704,9 +704,13 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); struct xfrm_state *x = skb_dst(skb)->xfrm; + int family; int err; - switch (x->outer_mode.family) { + family = (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ? x->outer_mode.family + : skb_dst(skb)->ops->family; + + switch (family) { case AF_INET: memset(IPCB(skb), 0, sizeof(*IPCB(skb))); IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1b7e75159727..da6ecc6b3e15 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2694,7 +2694,9 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m) mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); - family = xfrm[i]->props.family; + if (xfrm[i]->xso.type != XFRM_DEV_OFFLOAD_PACKET) + family = xfrm[i]->props.family; + oif = fl->flowi_oif ? : fl->flowi_l3mdev; dst = xfrm_dst_lookup(xfrm[i], tos, oif, &saddr, &daddr, family, mark); @@ -3416,7 +3418,7 @@ decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reve } fl4->flowi4_proto = flkeys->basic.ip_proto; - fl4->flowi4_tos = flkeys->ip.tos; + fl4->flowi4_tos = flkeys->ip.tos & ~INET_ECN_MASK; } #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c index 9e20d4a377f7..2248eda741f8 100644 --- a/net/xfrm/xfrm_state_bpf.c +++ b/net/xfrm/xfrm_state_bpf.c @@ -117,10 +117,10 @@ __bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __bpf_kfunc_end_defs(); -BTF_SET8_START(xfrm_state_kfunc_set) +BTF_KFUNCS_START(xfrm_state_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE) BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE) -BTF_SET8_END(xfrm_state_kfunc_set) +BTF_KFUNCS_END(xfrm_state_kfunc_set) static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a5232dcfea46..810b520493f3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2017,6 +2017,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) if (xp->xfrm_nr == 0) return 0; + if (xp->xfrm_nr > XFRM_MAX_DEPTH) + return -ENOBUFS; + for (i = 0; i < xp->xfrm_nr; i++) { struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index d2fbcf963cdf..07ff471ed6ae 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -370,7 +370,7 @@ static void run_perf_test(int tasks) static void fill_lpm_trie(void) { - struct bpf_lpm_trie_key *key; + struct bpf_lpm_trie_key_u8 *key; unsigned long value = 0; unsigned int i; int r; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 9d41db09c480..266fdd0b025d 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -91,7 +91,7 @@ static int recv_msg(struct sockaddr_nl sock_addr, int sock) static void read_route(struct nlmsghdr *nh, int nll) { char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24]; - struct bpf_lpm_trie_key *prefix_key; + struct bpf_lpm_trie_key_u8 *prefix_key; struct rtattr *rt_attr; struct rtmsg *rt_msg; int rtm_family; diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index 5a84b6443875..3ee8ecfb8c04 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -33,7 +33,7 @@ ld-option = $(success,$(LD) -v $(1)) # $(as-instr,<instr>) # Return y if the assembler supports <instr>, n otherwise -as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -c -x assembler-with-cpp -o /dev/null -) +as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o /dev/null -) # check if $(CC) and $(LD) exist $(error-if,$(failure,command -v $(CC)),C compiler '$(CC)' not found) diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler index 8fcb427405a6..92be0c9a13ee 100644 --- a/scripts/Makefile.compiler +++ b/scripts/Makefile.compiler @@ -38,7 +38,7 @@ as-option = $(call try-run,\ # Usage: aflags-y += $(call as-instr,instr,option1,option2) as-instr = $(call try-run,\ - printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3)) + printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3)) # __cc-option # Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 0669bac5e900..4606944984ee 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -827,7 +827,7 @@ class PrinterHelpers(Printer): print(' *{}{}'.format(' \t' if line else '', line)) print(' */') - print('static %s %s(*%s)(' % (self.map_type(proto['ret_type']), + print('static %s %s(* const %s)(' % (self.map_type(proto['ret_type']), proto['ret_star'], proto['name']), end='') comma = '' for i, a in enumerate(proto['args']): diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 98e1150bee9d..9a3dcaafb5b1 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -784,7 +784,7 @@ static int apparmor_getselfattr(unsigned int attr, struct lsm_ctx __user *lx, int error = -ENOENT; struct aa_task_ctx *ctx = task_ctx(current); struct aa_label *label = NULL; - char *value; + char *value = NULL; switch (attr) { case LSM_ATTR_CURRENT: diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index df387de29bfa..45c3e5dda355 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -179,7 +179,8 @@ static int __init integrity_add_key(const unsigned int id, const void *data, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) { rc = PTR_ERR(key); - pr_err("Problem loading X.509 certificate %d\n", rc); + if (id != INTEGRITY_KEYRING_MACHINE) + pr_err("Problem loading X.509 certificate %d\n", rc); } else { pr_notice("Loaded X.509 cert '%s'\n", key_ref_to_ptr(key)->description); diff --git a/security/landlock/fs.c b/security/landlock/fs.c index fc520a06f9af..0171f7eb6ee1 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -737,8 +737,8 @@ static int current_check_refer_path(struct dentry *const old_dentry, bool allow_parent1, allow_parent2; access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; - layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS], - layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS]; + layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, + layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; if (!dom) return 0; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 8dd506ab9b1f..7da35f51a0ef 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6559,7 +6559,7 @@ static int selinux_getselfattr(unsigned int attr, struct lsm_ctx __user *ctx, size_t *size, u32 flags) { int rc; - char *val; + char *val = NULL; int val_len; val_len = selinux_lsm_getattr(attr, current, &val); diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 57ee70ae50f2..ea3140d510ec 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -2649,13 +2649,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, { int error = buffer_len; size_t avail_len = buffer_len; - char *cp0 = head->write_buf; + char *cp0; int idx; if (!head->write) return -EINVAL; if (mutex_lock_interruptible(&head->io_sem)) return -EINTR; + cp0 = head->write_buf; head->read_user_buf_avail = 0; idx = tomoyo_read_lock(); /* Read a line and dispatch it to the policy handler. */ diff --git a/sound/core/Makefile b/sound/core/Makefile index a6b444ee2832..f6526b337137 100644 --- a/sound/core/Makefile +++ b/sound/core/Makefile @@ -32,7 +32,6 @@ snd-ump-objs := ump.o snd-ump-$(CONFIG_SND_UMP_LEGACY_RAWMIDI) += ump_convert.o snd-timer-objs := timer.o snd-hrtimer-objs := hrtimer.o -snd-rtctimer-objs := rtctimer.o snd-hwdep-objs := hwdep.o snd-seq-device-objs := seq_device.o diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index f5ff00f99788..21baf6bf7e25 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -486,6 +486,11 @@ static int fixup_unreferenced_params(struct snd_pcm_substream *substream, i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_SAMPLE_BITS); if (snd_interval_single(i)) params->msbits = snd_interval_value(i); + m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); + if (snd_mask_single(m)) { + snd_pcm_format_t format = (__force snd_pcm_format_t)snd_mask_min(m); + params->msbits = snd_pcm_format_width(format); + } } if (params->msbits) { diff --git a/sound/core/ump.c b/sound/core/ump.c index 3bef1944e955..fe7911498cc4 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -985,7 +985,7 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream) struct snd_ump_endpoint *ump = substream->rmidi->private_data; int dir = substream->stream; int group = ump->legacy_mapping[substream->number]; - int err; + int err = 0; mutex_lock(&ump->open_mutex); if (ump->legacy_substreams[dir][group]) { @@ -1009,7 +1009,7 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream) spin_unlock_irq(&ump->legacy_locks[dir]); unlock: mutex_unlock(&ump->open_mutex); - return 0; + return err; } static int snd_ump_legacy_close(struct snd_rawmidi_substream *substream) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index a13c0b408aad..7be17bca257f 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -951,7 +951,7 @@ static int generate_tx_packet_descs(struct amdtp_stream *s, struct pkt_desc *des // to the reason. unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle, IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES); - lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0); + lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0); } if (lost) { dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n", diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0ec1312bffd5..62701197a019 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3684,6 +3684,7 @@ static void alc285_hp_init(struct hda_codec *codec) int i, val; int coef38, coef0d, coef36; + alc_write_coefex_idx(codec, 0x58, 0x00, 0x1888); /* write default value */ alc_update_coef_idx(codec, 0x4a, 1<<15, 1<<15); /* Reset HP JD */ coef38 = alc_read_coef_idx(codec, 0x38); /* Amp control */ coef0d = alc_read_coef_idx(codec, 0x0d); /* Digital Misc control */ @@ -7444,6 +7445,7 @@ enum { ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE, ALC287_FIXUP_YOGA7_14ITL_SPEAKERS, ALC298_FIXUP_LENOVO_C940_DUET7, + ALC287_FIXUP_LENOVO_14IRP8_DUETITL, ALC287_FIXUP_13S_GEN2_SPEAKERS, ALC256_FIXUP_SET_COEF_DEFAULTS, ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, @@ -7495,6 +7497,26 @@ static void alc298_fixup_lenovo_c940_duet7(struct hda_codec *codec, __snd_hda_apply_fixup(codec, id, action, 0); } +/* A special fixup for Lenovo Slim/Yoga Pro 9 14IRP8 and Yoga DuetITL 2021; + * 14IRP8 PCI SSID will mistakenly be matched with the DuetITL codec SSID, + * so we need to apply a different fixup in this case. The only DuetITL codec + * SSID reported so far is the 17aa:3802 while the 14IRP8 has the 17aa:38be + * and 17aa:38bf. If it weren't for the PCI SSID, the 14IRP8 models would + * have matched correctly by their codecs. + */ +static void alc287_fixup_lenovo_14irp8_duetitl(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + int id; + + if (codec->core.subsystem_id == 0x17aa3802) + id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* DuetITL */ + else + id = ALC287_FIXUP_TAS2781_I2C; /* 14IRP8 */ + __snd_hda_apply_fixup(codec, id, action, 0); +} + static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_GPIO2] = { .type = HDA_FIXUP_FUNC, @@ -9379,6 +9401,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc298_fixup_lenovo_c940_duet7, }, + [ALC287_FIXUP_LENOVO_14IRP8_DUETITL] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_lenovo_14irp8_duetitl, + }, [ALC287_FIXUP_13S_GEN2_SPEAKERS] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -9585,7 +9611,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = tas2781_fixup_i2c, .chained = true, - .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + .chain_id = ALC285_FIXUP_THINKPAD_HEADSET_JACK, }, [ALC287_FIXUP_YOGA7_14ARB7_I2C] = { .type = HDA_FIXUP_FUNC, @@ -9746,6 +9772,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c28, "Dell Inspiron 16 Plus 7630", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0c4d, "Dell", ALC287_FIXUP_CS35L41_I2C_4), SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2), SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2), @@ -9902,6 +9929,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), @@ -9927,11 +9955,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), + SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), @@ -10247,7 +10277,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), SND_PCI_QUIRK(0x17aa, 0x334b, "Lenovo ThinkCentre M70 Gen5", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), - SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga Pro 9 14IRP8 / DuetITL 2021", ALC287_FIXUP_LENOVO_14IRP8_DUETITL), SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS), diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index cc231185d72c..abb9589b8477 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -203,6 +203,20 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J2"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J0"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21J5"), } }, diff --git a/sound/soc/amd/yc/pci-acp6x.c b/sound/soc/amd/yc/pci-acp6x.c index 7af6a349b1d4..694b8e313902 100644 --- a/sound/soc/amd/yc/pci-acp6x.c +++ b/sound/soc/amd/yc/pci-acp6x.c @@ -162,6 +162,7 @@ static int snd_acp6x_probe(struct pci_dev *pci, /* Yellow Carp device check */ switch (pci->revision) { case 0x60: + case 0x63: case 0x6f: break; default: diff --git a/sound/soc/codecs/cs35l45.c b/sound/soc/codecs/cs35l45.c index 44c221745c3b..2392c6effed8 100644 --- a/sound/soc/codecs/cs35l45.c +++ b/sound/soc/codecs/cs35l45.c @@ -184,7 +184,7 @@ static int cs35l45_activate_ctl(struct snd_soc_component *component, else snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s", ctl_name); - kcontrol = snd_soc_card_get_kcontrol(component->card, name); + kcontrol = snd_soc_card_get_kcontrol_locked(component->card, name); if (!kcontrol) { dev_err(component->dev, "Can't find kcontrol %s\n", name); return -EINVAL; diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 995d979b6d87..cb4e83126b08 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -335,6 +335,7 @@ void cs35l56_wait_min_reset_pulse(void) EXPORT_SYMBOL_NS_GPL(cs35l56_wait_min_reset_pulse, SND_SOC_CS35L56_SHARED); static const struct reg_sequence cs35l56_system_reset_seq[] = { + REG_SEQ0(CS35L56_DSP1_HALO_STATE, 0), REG_SEQ0(CS35L56_DSP_VIRTUAL1_MBOX_1, CS35L56_MBOX_CMD_SYSTEM_RESET), }; diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 2c1313e34cce..6dd0319bc843 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -114,7 +114,7 @@ static int cs35l56_sync_asp1_mixer_widgets_with_firmware(struct cs35l56_private name = full_name; } - kcontrol = snd_soc_card_get_kcontrol(dapm->card, name); + kcontrol = snd_soc_card_get_kcontrol_locked(dapm->card, name); if (!kcontrol) { dev_warn(cs35l56->base.dev, "Could not find control %s\n", name); continue; diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c index f0fb33d719c2..c46f64557a7f 100644 --- a/sound/soc/fsl/fsl_xcvr.c +++ b/sound/soc/fsl/fsl_xcvr.c @@ -174,7 +174,9 @@ static int fsl_xcvr_activate_ctl(struct snd_soc_dai *dai, const char *name, struct snd_kcontrol *kctl; bool enabled; - kctl = snd_soc_card_get_kcontrol(card, name); + lockdep_assert_held(&card->snd_card->controls_rwsem); + + kctl = snd_soc_card_get_kcontrol_locked(card, name); if (kctl == NULL) return -ENOENT; @@ -576,10 +578,14 @@ static int fsl_xcvr_startup(struct snd_pcm_substream *substream, xcvr->streams |= BIT(substream->stream); if (!xcvr->soc_data->spdif_only) { + struct snd_soc_card *card = dai->component->card; + /* Disable XCVR controls if there is stream started */ + down_read(&card->snd_card->controls_rwsem); fsl_xcvr_activate_ctl(dai, fsl_xcvr_mode_kctl.name, false); fsl_xcvr_activate_ctl(dai, fsl_xcvr_arc_mode_kctl.name, false); fsl_xcvr_activate_ctl(dai, fsl_xcvr_earc_capds_kctl.name, false); + up_read(&card->snd_card->controls_rwsem); } return 0; @@ -598,11 +604,15 @@ static void fsl_xcvr_shutdown(struct snd_pcm_substream *substream, /* Enable XCVR controls if there is no stream started */ if (!xcvr->streams) { if (!xcvr->soc_data->spdif_only) { + struct snd_soc_card *card = dai->component->card; + + down_read(&card->snd_card->controls_rwsem); fsl_xcvr_activate_ctl(dai, fsl_xcvr_mode_kctl.name, true); fsl_xcvr_activate_ctl(dai, fsl_xcvr_arc_mode_kctl.name, (xcvr->mode == FSL_XCVR_MODE_ARC)); fsl_xcvr_activate_ctl(dai, fsl_xcvr_earc_capds_kctl.name, (xcvr->mode == FSL_XCVR_MODE_EARC)); + up_read(&card->snd_card->controls_rwsem); } ret = regmap_update_bits(xcvr->regmap, FSL_XCVR_EXT_IER0, FSL_XCVR_IRQ_EARC_ALL, 0); diff --git a/sound/soc/qcom/lpass-cdc-dma.c b/sound/soc/qcom/lpass-cdc-dma.c index 48b03e60e3a3..8106c586f68a 100644 --- a/sound/soc/qcom/lpass-cdc-dma.c +++ b/sound/soc/qcom/lpass-cdc-dma.c @@ -259,7 +259,7 @@ static int lpass_cdc_dma_daiops_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *soc_runtime = snd_soc_substream_to_rtd(substream); - struct lpaif_dmactl *dmactl; + struct lpaif_dmactl *dmactl = NULL; int ret = 0, id; switch (cmd) { diff --git a/sound/soc/soc-card.c b/sound/soc/soc-card.c index 285ab4c9c716..8a2f163da6bc 100644 --- a/sound/soc/soc-card.c +++ b/sound/soc/soc-card.c @@ -5,6 +5,9 @@ // Copyright (C) 2019 Renesas Electronics Corp. // Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> // + +#include <linux/lockdep.h> +#include <linux/rwsem.h> #include <sound/soc.h> #include <sound/jack.h> @@ -26,12 +29,15 @@ static inline int _soc_card_ret(struct snd_soc_card *card, return ret; } -struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card, - const char *name) +struct snd_kcontrol *snd_soc_card_get_kcontrol_locked(struct snd_soc_card *soc_card, + const char *name) { struct snd_card *card = soc_card->snd_card; struct snd_kcontrol *kctl; + /* must be held read or write */ + lockdep_assert_held(&card->controls_rwsem); + if (unlikely(!name)) return NULL; @@ -40,6 +46,20 @@ struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card, return kctl; return NULL; } +EXPORT_SYMBOL_GPL(snd_soc_card_get_kcontrol_locked); + +struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card, + const char *name) +{ + struct snd_card *card = soc_card->snd_card; + struct snd_kcontrol *kctl; + + down_read(&card->controls_rwsem); + kctl = snd_soc_card_get_kcontrol_locked(soc_card, name); + up_read(&card->controls_rwsem); + + return kctl; +} EXPORT_SYMBOL_GPL(snd_soc_card_get_kcontrol); static int jack_new(struct snd_soc_card *card, const char *id, int type, diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index 5006e724d1bc..5e60825818dd 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -257,18 +257,48 @@ EXAMPLES return 0; } -This is example BPF application with two BPF programs and a mix of BPF maps -and global variables. Source code is split across two source code files. +**$ cat example3.bpf.c** + +:: + + #include <linux/ptrace.h> + #include <linux/bpf.h> + #include <bpf/bpf_helpers.h> + /* This header file is provided by the bpf_testmod module. */ + #include "bpf_testmod.h" + + int test_2_result = 0; + + /* bpf_Testmod.ko calls this function, passing a "4" + * and testmod_map->data. + */ + SEC("struct_ops/test_2") + void BPF_PROG(test_2, int a, int b) + { + test_2_result = a + b; + } + + SEC(".struct_ops") + struct bpf_testmod_ops testmod_map = { + .test_2 = (void *)test_2, + .data = 0x1, + }; + +This is example BPF application with three BPF programs and a mix of BPF +maps and global variables. Source code is split across three source code +files. **$ clang --target=bpf -g example1.bpf.c -o example1.bpf.o** **$ clang --target=bpf -g example2.bpf.c -o example2.bpf.o** -**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o** +**$ clang --target=bpf -g example3.bpf.c -o example3.bpf.o** + +**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o example3.bpf.o** -This set of commands compiles *example1.bpf.c* and *example2.bpf.c* -individually and then statically links respective object files into the final -BPF ELF object file *example.bpf.o*. +This set of commands compiles *example1.bpf.c*, *example2.bpf.c* and +*example3.bpf.c* individually and then statically links respective object +files into the final BPF ELF object file *example.bpf.o*. **$ bpftool gen skeleton example.bpf.o name example | tee example.skel.h** @@ -291,7 +321,15 @@ BPF ELF object file *example.bpf.o*. struct bpf_map *data; struct bpf_map *bss; struct bpf_map *my_map; + struct bpf_map *testmod_map; } maps; + struct { + struct example__testmod_map__bpf_testmod_ops { + const struct bpf_program *test_1; + const struct bpf_program *test_2; + int data; + } *testmod_map; + } struct_ops; struct { struct bpf_program *handle_sys_enter; struct bpf_program *handle_sys_exit; @@ -304,6 +342,7 @@ BPF ELF object file *example.bpf.o*. struct { int x; } data; + int test_2_result; } *bss; struct example__data { _Bool global_flag; @@ -342,10 +381,16 @@ BPF ELF object file *example.bpf.o*. skel->rodata->param1 = 128; + /* Change the value through the pointer of shadow type */ + skel->struct_ops.testmod_map->data = 13; + err = example__load(skel); if (err) goto cleanup; + /* The result of the function test_2() */ + printf("test_2_result: %d\n", skel->bss->test_2_result); + err = example__attach(skel); if (err) goto cleanup; @@ -372,6 +417,7 @@ BPF ELF object file *example.bpf.o*. :: + test_2_result: 17 my_map name: my_map sys_enter prog FD: 8 my_static_var: 7 diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index ee3ce2b8000d..1f579eacd9d4 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -7,6 +7,7 @@ #include <ctype.h> #include <errno.h> #include <fcntl.h> +#include <libgen.h> #include <linux/err.h> #include <stdbool.h> #include <stdio.h> @@ -54,11 +55,27 @@ static bool str_has_suffix(const char *str, const char *suffix) return true; } +static const struct btf_type * +resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) +{ + const struct btf_type *t; + + t = skip_mods_and_typedefs(btf, id, NULL); + if (!btf_is_ptr(t)) + return NULL; + + t = skip_mods_and_typedefs(btf, t->type, res_id); + + return btf_is_func_proto(t) ? t : NULL; +} + static void get_obj_name(char *name, const char *file) { - /* Using basename() GNU version which doesn't modify arg. */ - strncpy(name, basename(file), MAX_OBJ_NAME_LEN - 1); - name[MAX_OBJ_NAME_LEN - 1] = '\0'; + char file_copy[PATH_MAX]; + + /* Using basename() POSIX version to be more portable. */ + strncpy(file_copy, file, PATH_MAX - 1)[PATH_MAX - 1] = '\0'; + strncpy(name, basename(file_copy), MAX_OBJ_NAME_LEN - 1)[MAX_OBJ_NAME_LEN - 1] = '\0'; if (str_has_suffix(name, ".o")) name[strlen(name) - 2] = '\0'; sanitize_identifier(name); @@ -906,6 +923,207 @@ codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_li } } +static int walk_st_ops_shadow_vars(struct btf *btf, const char *ident, + const struct btf_type *map_type, __u32 map_type_id) +{ + LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts, .indent_level = 3); + const struct btf_type *member_type; + __u32 offset, next_offset = 0; + const struct btf_member *m; + struct btf_dump *d = NULL; + const char *member_name; + __u32 member_type_id; + int i, err = 0, n; + int size; + + d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL); + if (!d) + return -errno; + + n = btf_vlen(map_type); + for (i = 0, m = btf_members(map_type); i < n; i++, m++) { + member_type = skip_mods_and_typedefs(btf, m->type, &member_type_id); + member_name = btf__name_by_offset(btf, m->name_off); + + offset = m->offset / 8; + if (next_offset < offset) + printf("\t\t\tchar __padding_%d[%d];\n", i, offset - next_offset); + + switch (btf_kind(member_type)) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* scalar type */ + printf("\t\t\t"); + opts.field_name = member_name; + err = btf_dump__emit_type_decl(d, member_type_id, &opts); + if (err) { + p_err("Failed to emit type declaration for %s: %d", member_name, err); + goto out; + } + printf(";\n"); + + size = btf__resolve_size(btf, member_type_id); + if (size < 0) { + p_err("Failed to resolve size of %s: %d\n", member_name, size); + err = size; + goto out; + } + + next_offset = offset + size; + break; + + case BTF_KIND_PTR: + if (resolve_func_ptr(btf, m->type, NULL)) { + /* Function pointer */ + printf("\t\t\tstruct bpf_program *%s;\n", member_name); + + next_offset = offset + sizeof(void *); + break; + } + /* All pointer types are unsupported except for + * function pointers. + */ + fallthrough; + + default: + /* Unsupported types + * + * Types other than scalar types and function + * pointers are currently not supported in order to + * prevent conflicts in the generated code caused + * by multiple definitions. For instance, if the + * struct type FOO is used in a struct_ops map, + * bpftool has to generate definitions for FOO, + * which may result in conflicts if FOO is defined + * in different skeleton files. + */ + size = btf__resolve_size(btf, member_type_id); + if (size < 0) { + p_err("Failed to resolve size of %s: %d\n", member_name, size); + err = size; + goto out; + } + printf("\t\t\tchar __unsupported_%d[%d];\n", i, size); + + next_offset = offset + size; + break; + } + } + + /* Cannot fail since it must be a struct type */ + size = btf__resolve_size(btf, map_type_id); + if (next_offset < (__u32)size) + printf("\t\t\tchar __padding_end[%d];\n", size - next_offset); + +out: + btf_dump__free(d); + + return err; +} + +/* Generate the pointer of the shadow type for a struct_ops map. + * + * This function adds a pointer of the shadow type for a struct_ops map. + * The members of a struct_ops map can be exported through a pointer to a + * shadow type. The user can access these members through the pointer. + * + * A shadow type includes not all members, only members of some types. + * They are scalar types and function pointers. The function pointers are + * translated to the pointer of the struct bpf_program. The scalar types + * are translated to the original type without any modifiers. + * + * Unsupported types will be translated to a char array to occupy the same + * space as the original field, being renamed as __unsupported_*. The user + * should treat these fields as opaque data. + */ +static int gen_st_ops_shadow_type(const char *obj_name, struct btf *btf, const char *ident, + const struct bpf_map *map) +{ + const struct btf_type *map_type; + const char *type_name; + __u32 map_type_id; + int err; + + map_type_id = bpf_map__btf_value_type_id(map); + if (map_type_id == 0) + return -EINVAL; + map_type = btf__type_by_id(btf, map_type_id); + if (!map_type) + return -EINVAL; + + type_name = btf__name_by_offset(btf, map_type->name_off); + + printf("\t\tstruct %s__%s__%s {\n", obj_name, ident, type_name); + + err = walk_st_ops_shadow_vars(btf, ident, map_type, map_type_id); + if (err) + return err; + + printf("\t\t} *%s;\n", ident); + + return 0; +} + +static int gen_st_ops_shadow(const char *obj_name, struct btf *btf, struct bpf_object *obj) +{ + int err, st_ops_cnt = 0; + struct bpf_map *map; + char ident[256]; + + if (!btf) + return 0; + + /* Generate the pointers to shadow types of + * struct_ops maps. + */ + bpf_object__for_each_map(map, obj) { + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) + continue; + if (!get_map_ident(map, ident, sizeof(ident))) + continue; + + if (st_ops_cnt == 0) /* first struct_ops map */ + printf("\tstruct {\n"); + st_ops_cnt++; + + err = gen_st_ops_shadow_type(obj_name, btf, ident, map); + if (err) + return err; + } + + if (st_ops_cnt) + printf("\t} struct_ops;\n"); + + return 0; +} + +/* Generate the code to initialize the pointers of shadow types. */ +static void gen_st_ops_shadow_init(struct btf *btf, struct bpf_object *obj) +{ + struct bpf_map *map; + char ident[256]; + + if (!btf) + return; + + /* Initialize the pointers to_ops shadow types of + * struct_ops maps. + */ + bpf_object__for_each_map(map, obj) { + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) + continue; + if (!get_map_ident(map, ident, sizeof(ident))) + continue; + codegen("\ + \n\ + obj->struct_ops.%1$s = bpf_map__initial_value(obj->maps.%1$s, NULL);\n\ + \n\ + ", ident); + } +} + static int do_skeleton(int argc, char **argv) { char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")]; @@ -1049,6 +1267,11 @@ static int do_skeleton(int argc, char **argv) printf("\t} maps;\n"); } + btf = bpf_object__btf(obj); + err = gen_st_ops_shadow(obj_name, btf, obj); + if (err) + goto out; + if (prog_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { @@ -1072,7 +1295,6 @@ static int do_skeleton(int argc, char **argv) printf("\t} links;\n"); } - btf = bpf_object__btf(obj); if (btf) { err = codegen_datasecs(obj, obj_name); if (err) @@ -1130,6 +1352,12 @@ static int do_skeleton(int argc, char **argv) if (err) \n\ goto err_out; \n\ \n\ + ", obj_name); + + gen_st_ops_shadow_init(btf, obj); + + codegen("\ + \n\ return obj; \n\ err_out: \n\ %1$s__destroy(obj); \n\ @@ -1439,6 +1667,10 @@ static int do_subskeleton(int argc, char **argv) printf("\t} maps;\n"); } + err = gen_st_ops_shadow(obj_name, btf, obj); + if (err) + goto out; + if (prog_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { @@ -1550,6 +1782,12 @@ static int do_subskeleton(int argc, char **argv) if (err) \n\ goto err; \n\ \n\ + "); + + gen_st_ops_shadow_init(btf, obj); + + codegen("\ + \n\ return obj; \n\ err: \n\ %1$s__destroy(obj); \n\ diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 27a23196d58e..d9520cb826b3 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -70,6 +70,7 @@ #include <sys/stat.h> #include <fcntl.h> #include <errno.h> +#include <linux/btf_ids.h> #include <linux/rbtree.h> #include <linux/zalloc.h> #include <linux/err.h> @@ -78,7 +79,7 @@ #include <subcmd/parse-options.h> #define BTF_IDS_SECTION ".BTF_ids" -#define BTF_ID "__BTF_ID__" +#define BTF_ID_PREFIX "__BTF_ID__" #define BTF_STRUCT "struct" #define BTF_UNION "union" @@ -89,6 +90,14 @@ #define ADDR_CNT 100 +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ELFDATANATIVE ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ELFDATANATIVE ELFDATA2MSB +#else +# error "Unknown machine endianness!" +#endif + struct btf_id { struct rb_node rb_node; char *name; @@ -116,6 +125,7 @@ struct object { int idlist_shndx; size_t strtabidx; unsigned long idlist_addr; + int encoding; } efile; struct rb_root sets; @@ -161,7 +171,7 @@ static int eprintf(int level, int var, const char *fmt, ...) static bool is_btf_id(const char *name) { - return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1); + return name && !strncmp(name, BTF_ID_PREFIX, sizeof(BTF_ID_PREFIX) - 1); } static struct btf_id *btf_id__find(struct rb_root *root, const char *name) @@ -319,6 +329,7 @@ static int elf_collect(struct object *obj) { Elf_Scn *scn = NULL; size_t shdrstrndx; + GElf_Ehdr ehdr; int idx = 0; Elf *elf; int fd; @@ -350,6 +361,13 @@ static int elf_collect(struct object *obj) return -1; } + if (gelf_getehdr(obj->efile.elf, &ehdr) == NULL) { + pr_err("FAILED cannot get ELF header: %s\n", + elf_errmsg(-1)); + return -1; + } + obj->efile.encoding = ehdr.e_ident[EI_DATA]; + /* * Scan all the elf sections and look for save data * from .BTF_ids section and symbols. @@ -441,7 +459,7 @@ static int symbols_collect(struct object *obj) * __BTF_ID__TYPE__vfs_truncate__0 * prefix = ^ */ - prefix = name + sizeof(BTF_ID) - 1; + prefix = name + sizeof(BTF_ID_PREFIX) - 1; /* struct */ if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) { @@ -649,19 +667,18 @@ static int cmp_id(const void *pa, const void *pb) static int sets_patch(struct object *obj) { Elf_Data *data = obj->efile.idlist; - int *ptr = data->d_buf; struct rb_node *next; next = rb_first(&obj->sets); while (next) { - unsigned long addr, idx; + struct btf_id_set8 *set8; + struct btf_id_set *set; + unsigned long addr, off; struct btf_id *id; - int *base; - int cnt; id = rb_entry(next, struct btf_id, rb_node); addr = id->addr[0]; - idx = addr - obj->efile.idlist_addr; + off = addr - obj->efile.idlist_addr; /* sets are unique */ if (id->addr_cnt != 1) { @@ -670,14 +687,39 @@ static int sets_patch(struct object *obj) return -1; } - idx = idx / sizeof(int); - base = &ptr[idx] + (id->is_set8 ? 2 : 1); - cnt = ptr[idx]; + if (id->is_set) { + set = data->d_buf + off; + qsort(set->ids, set->cnt, sizeof(set->ids[0]), cmp_id); + } else { + set8 = data->d_buf + off; + /* + * Make sure id is at the beginning of the pairs + * struct, otherwise the below qsort would not work. + */ + BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); + qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); - pr_debug("sorting addr %5lu: cnt %6d [%s]\n", - (idx + 1) * sizeof(int), cnt, id->name); + /* + * When ELF endianness does not match endianness of the + * host, libelf will do the translation when updating + * the ELF. This, however, corrupts SET8 flags which are + * already in the target endianness. So, let's bswap + * them to the host endianness and libelf will then + * correctly translate everything. + */ + if (obj->efile.encoding != ELFDATANATIVE) { + int i; + + set8->flags = bswap_32(set8->flags); + for (i = 0; i < set8->cnt; i++) { + set8->pairs[i].flags = + bswap_32(set8->pairs[i].flags); + } + } + } - qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id); + pr_debug("sorting addr %5lu: cnt %6d [%s]\n", + off, id->is_set ? set->cnt : set8->cnt, id->name); next = rb_next(next); } diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 2f882d5cb30f..72535f00572f 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -8,6 +8,15 @@ struct btf_id_set { u32 ids[]; }; +struct btf_id_set8 { + u32 cnt; + u32 flags; + struct { + u32 id; + u32 flags; + } pairs[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include <linux/compiler.h> /* for __PASTE */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d96708380e52..a241f407c234 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -77,12 +77,29 @@ struct bpf_insn { __s32 imm; /* signed immediate constant */ }; -/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for + * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for + * the trailing flexible array member) instead. + */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ __u8 data[0]; /* Arbitrary size */ }; +/* Header for bpf_lpm_trie_key structs */ +struct bpf_lpm_trie_key_hdr { + __u32 prefixlen; +}; + +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */ +struct bpf_lpm_trie_key_u8 { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + __u8 data[]; /* Arbitrary size */ +}; + struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ __u32 attach_type; /* program attach type (enum bpf_attach_type) */ @@ -617,7 +634,11 @@ union bpf_iter_link_info { * to NULL to begin the batched operation. After each subsequent * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant * *out_batch* as the *in_batch* for the next operation to - * continue iteration from the current point. + * continue iteration from the current point. Both *in_batch* and + * *out_batch* must point to memory large enough to hold a key, + * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH, + * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters + * must be at least 4 bytes wide regardless of key size. * * The *keys* and *values* are output parameters which must point * to memory large enough to hold *count* items based on the key diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 1441f642c563..df0db2f0cdb7 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -35,7 +35,7 @@ extern "C" { #endif -int libbpf_set_memlock_rlim(size_t memlock_bytes); +LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes); struct bpf_map_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -190,10 +190,14 @@ LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys, /** * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements. * - * The parameter *in_batch* is the address of the first element in the batch to read. - * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent - * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate - * that the batched lookup starts from the beginning of the map. + * The parameter *in_batch* is the address of the first element in the batch to + * read. *out_batch* is an output parameter that should be passed as *in_batch* + * to subsequent calls to **bpf_map_lookup_batch()**. NULL can be passed for + * *in_batch* to indicate that the batched lookup starts from the beginning of + * the map. Both *in_batch* and *out_batch* must point to memory large enough to + * hold a single key, except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH, + * LRU_HASH, LRU_PERCPU_HASH}**, for which the memory size must be at + * least 4 bytes wide regardless of key size. * * The *keys* and *values* are output parameters which must point to memory large enough to * hold *count* items based on the key and value size of the map *map_fd*. The *keys* @@ -226,7 +230,10 @@ LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, * * @param fd BPF map file descriptor * @param in_batch address of the first element in batch to read, can pass NULL to - * get address of the first element in *out_batch* + * get address of the first element in *out_batch*. If not NULL, must be large + * enough to hold a key. For **BPF_MAP_TYPE_{HASH, PERCPU_HASH, LRU_HASH, + * LRU_PERCPU_HASH}**, the memory size must be at least 4 bytes wide regardless + * of key size. * @param out_batch output parameter that should be passed to next call as *in_batch* * @param keys pointer to an array of *count* keys * @param values pointer to an array large enough for *count* values @@ -500,7 +507,10 @@ LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len); * program corresponding to *prog_fd*. * * Populates up to *info_len* bytes of *info* and updates *info_len* with the - * actual number of bytes written to *info*. + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. * * @param prog_fd BPF program file descriptor * @param info pointer to **struct bpf_prog_info** that will be populated with @@ -517,7 +527,10 @@ LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, * map corresponding to *map_fd*. * * Populates up to *info_len* bytes of *info* and updates *info_len* with the - * actual number of bytes written to *info*. + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. * * @param map_fd BPF map file descriptor * @param info pointer to **struct bpf_map_info** that will be populated with @@ -530,11 +543,14 @@ LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, LIBBPF_API int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len); /** - * @brief **bpf_btf_get_info_by_fd()** obtains information about the + * @brief **bpf_btf_get_info_by_fd()** obtains information about the * BTF object corresponding to *btf_fd*. * * Populates up to *info_len* bytes of *info* and updates *info_len* with the - * actual number of bytes written to *info*. + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. * * @param btf_fd BTF object file descriptor * @param info pointer to **struct bpf_btf_info** that will be populated with @@ -551,7 +567,10 @@ LIBBPF_API int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u * link corresponding to *link_fd*. * * Populates up to *info_len* bytes of *info* and updates *info_len* with the - * actual number of bytes written to *info*. + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. * * @param link_fd BPF link file descriptor * @param info pointer to **struct bpf_link_info** that will be populated with diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 5aec301e9585..1ce738d91685 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -2,6 +2,8 @@ #ifndef __BPF_CORE_READ_H__ #define __BPF_CORE_READ_H__ +#include <bpf/bpf_helpers.h> + /* * enum bpf_field_info_kind is passed as a second argument into * __builtin_preserve_field_info() built-in to get a specific aspect of @@ -44,7 +46,7 @@ enum bpf_enum_value_kind { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ bpf_probe_read_kernel( \ - (void *)dst, \ + (void *)dst, \ __CORE_RELO(src, fld, BYTE_SIZE), \ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) #else @@ -143,8 +145,29 @@ enum bpf_enum_value_kind { } \ }) +/* Differentiator between compilers builtin implementations. This is a + * requirement due to the compiler parsing differences where GCC optimizes + * early in parsing those constructs of type pointers to the builtin specific + * type, resulting in not being possible to collect the required type + * information in the builtin expansion. + */ +#ifdef __clang__ +#define ___bpf_typeof(type) ((typeof(type) *) 0) +#else +#define ___bpf_typeof1(type, NR) ({ \ + extern typeof(type) *___concat(bpf_type_tmp_, NR); \ + ___concat(bpf_type_tmp_, NR); \ +}) +#define ___bpf_typeof(type) ___bpf_typeof1(type, __COUNTER__) +#endif + +#ifdef __clang__ #define ___bpf_field_ref1(field) (field) -#define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field) +#define ___bpf_field_ref2(type, field) (___bpf_typeof(type)->field) +#else +#define ___bpf_field_ref1(field) (&(field)) +#define ___bpf_field_ref2(type, field) (&(___bpf_typeof(type)->field)) +#endif #define ___bpf_field_ref(args...) \ ___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args) @@ -194,7 +217,7 @@ enum bpf_enum_value_kind { * BTF. Always succeeds. */ #define bpf_core_type_id_local(type) \ - __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL) + __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_LOCAL) /* * Convenience macro to get BTF type ID of a target kernel's type that matches @@ -204,7 +227,7 @@ enum bpf_enum_value_kind { * - 0, if no matching type was found in a target kernel BTF. */ #define bpf_core_type_id_kernel(type) \ - __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET) + __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_TARGET) /* * Convenience macro to check that provided named type @@ -214,7 +237,7 @@ enum bpf_enum_value_kind { * 0, if no matching type is found. */ #define bpf_core_type_exists(type) \ - __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS) + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_EXISTS) /* * Convenience macro to check that provided named type @@ -224,7 +247,7 @@ enum bpf_enum_value_kind { * 0, if the type does not match any in the target kernel */ #define bpf_core_type_matches(type) \ - __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES) + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_MATCHES) /* * Convenience macro to get the byte size of a provided named type @@ -234,7 +257,7 @@ enum bpf_enum_value_kind { * 0, if no matching type is found. */ #define bpf_core_type_size(type) \ - __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE) + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_SIZE) /* * Convenience macro to check that provided enumerator value is defined in @@ -244,8 +267,13 @@ enum bpf_enum_value_kind { * kernel's BTF; * 0, if no matching enum and/or enum value within that enum is found. */ +#ifdef __clang__ #define bpf_core_enum_value_exists(enum_type, enum_value) \ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS) +#else +#define bpf_core_enum_value_exists(enum_type, enum_value) \ + __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_EXISTS) +#endif /* * Convenience macro to get the integer value of an enumerator value in @@ -255,8 +283,13 @@ enum bpf_enum_value_kind { * present in target kernel's BTF; * 0, if no matching enum and/or enum value within that enum is found. */ +#ifdef __clang__ #define bpf_core_enum_value(enum_type, enum_value) \ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE) +#else +#define bpf_core_enum_value(enum_type, enum_value) \ + __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_VALUE) +#endif /* * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures @@ -292,6 +325,17 @@ enum bpf_enum_value_kind { #define bpf_core_read_user_str(dst, sz, src) \ bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) +extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; + +/* + * Cast provided pointer *ptr* into a pointer to a specified *type* in such + * a way that BPF verifier will become aware of associated kernel-side BTF + * type. This allows to access members of kernel types directly without the + * need to use BPF_CORE_READ() macros. + */ +#define bpf_core_cast(ptr, type) \ + ((typeof(type) *)bpf_rdonly_cast((ptr), bpf_core_type_id_kernel(type))) + #define ___concat(a, b) a ## b #define ___apply(fn, n) ___concat(fn, n) #define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 2324cc42b017..79eaa581be98 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -190,6 +190,8 @@ enum libbpf_tristate { #define __arg_ctx __attribute__((btf_decl_tag("arg:ctx"))) #define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull"))) +#define __arg_nullable __attribute((btf_decl_tag("arg:nullable"))) +#define __arg_trusted __attribute((btf_decl_tag("arg:trusted"))) #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ec92b87cae01..a17b4c9c4213 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1079,6 +1079,11 @@ struct btf *btf__new(const void *data, __u32 size) return libbpf_ptr(btf_new(data, size, NULL)); } +struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf) +{ + return libbpf_ptr(btf_new(data, size, base_btf)); +} + static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) { @@ -3045,12 +3050,16 @@ done: return btf_ext; } -const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) +const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size) { *size = btf_ext->data_size; return btf_ext->data; } +__attribute__((alias("btf_ext__raw_data"))) +const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); + + struct btf_dedup; static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts); @@ -4932,10 +4941,9 @@ static int btf_dedup_remap_types(struct btf_dedup *d) */ struct btf *btf__load_vmlinux_btf(void) { + const char *sysfs_btf_path = "/sys/kernel/btf/vmlinux"; + /* fall back locations, trying to find vmlinux on disk */ const char *locations[] = { - /* try canonical vmlinux BTF through sysfs first */ - "/sys/kernel/btf/vmlinux", - /* fall back to trying to find vmlinux on disk otherwise */ "/boot/vmlinux-%1$s", "/lib/modules/%1$s/vmlinux-%1$s", "/lib/modules/%1$s/build/vmlinux", @@ -4949,8 +4957,23 @@ struct btf *btf__load_vmlinux_btf(void) struct btf *btf; int i, err; - uname(&buf); + /* is canonical sysfs location accessible? */ + if (faccessat(AT_FDCWD, sysfs_btf_path, F_OK, AT_EACCESS) < 0) { + pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n", + sysfs_btf_path); + } else { + btf = btf__parse(sysfs_btf_path, NULL); + if (!btf) { + err = -errno; + pr_warn("failed to read kernel BTF from '%s': %d\n", sysfs_btf_path, err); + return libbpf_err_ptr(err); + } + pr_debug("loaded kernel BTF from '%s'\n", path); + return btf; + } + /* try fallback locations */ + uname(&buf); for (i = 0; i < ARRAY_SIZE(locations); i++) { snprintf(path, PATH_MAX, locations[i], buf.release); diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 5a5c766bf615..6b0738ad7063 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -407,6 +407,61 @@ static int probe_kern_btf_enum64(int token_fd) strs, sizeof(strs), token_fd)); } +static int probe_kern_arg_ctx_tag(int token_fd) +{ + static const char strs[] = "\0a\0b\0arg:ctx\0"; + const __u32 types[] = { + /* [1] INT */ + BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4), + /* [2] PTR -> VOID */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + /* [3] FUNC_PROTO `int(void *a)` */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), + BTF_PARAM_ENC(1 /* "a" */, 2), + /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */ + BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3), + /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), + BTF_PARAM_ENC(3 /* "b" */, 2), + /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */ + BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5), + /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */ + BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0), + }; + const struct bpf_insn insns[] = { + /* main prog */ + BPF_CALL_REL(+1), + BPF_EXIT_INSN(), + /* global subprog */ + BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */ + BPF_EXIT_INSN(), + }; + const struct bpf_func_info_min func_infos[] = { + { 0, 4 }, /* main prog -> FUNC 'a' */ + { 2, 6 }, /* subprog -> FUNC 'b' */ + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns); + + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); + if (btf_fd < 0) + return 0; + + opts.prog_btf_fd = btf_fd; + opts.func_info = &func_infos; + opts.func_info_cnt = ARRAY_SIZE(func_infos); + opts.func_info_rec_size = sizeof(func_infos[0]); + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx", + "GPL", insns, insn_cnt, &opts); + close(btf_fd); + + return probe_fd(prog_fd); +} + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -476,6 +531,9 @@ static struct kern_feature_desc { [FEAT_UPROBE_MULTI_LINK] = { "BPF multi-uprobe link support", probe_uprobe_multi_link, }, + [FEAT_ARG_CTX_TAG] = { + "kernel-side __arg_ctx tag", probe_kern_arg_ctx_tag, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fa7094ff3e66..6c2979f1b471 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -33,6 +33,7 @@ #include <linux/filter.h> #include <linux/limits.h> #include <linux/perf_event.h> +#include <linux/bpf_perf_event.h> #include <linux/ring_buffer.h> #include <sys/epoll.h> #include <sys/ioctl.h> @@ -1013,6 +1014,19 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map) return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; } +static bool is_valid_st_ops_program(struct bpf_object *obj, + const struct bpf_program *prog) +{ + int i; + + for (i = 0; i < obj->nr_programs; i++) { + if (&obj->programs[i] == prog) + return prog->type == BPF_PROG_TYPE_STRUCT_OPS; + } + + return false; +} + /* Init the map's fields that depend on kern_btf */ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) { @@ -1101,9 +1115,16 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) if (btf_is_ptr(mtype)) { struct bpf_program *prog; - prog = st_ops->progs[i]; + /* Update the value from the shadow type */ + prog = *(void **)mdata; + st_ops->progs[i] = prog; if (!prog) continue; + if (!is_valid_st_ops_program(obj, prog)) { + pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n", + map->name, mname); + return -ENOTSUP; + } kern_mtype = skip_mods_and_typedefs(kern_btf, kern_mtype->type, @@ -1228,6 +1249,7 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, map->name = strdup(var_name); if (!map->name) return -ENOMEM; + map->btf_value_type_id = type_id; map->def.type = BPF_MAP_TYPE_STRUCT_OPS; map->def.key_size = sizeof(int); @@ -1524,11 +1546,20 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam return ERR_PTR(-ENOENT); } +/* Some versions of Android don't provide memfd_create() in their libc + * implementation, so avoid complications and just go straight to Linux + * syscall. + */ +static int sys_memfd_create(const char *name, unsigned flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + static int create_placeholder_fd(void) { int fd; - fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); + fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); if (fd < 0) return -errno; return fd; @@ -4660,7 +4691,7 @@ bpf_object__probe_loading(struct bpf_object *obj) bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { - if (obj && obj->gen_loader) + if (obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ @@ -4847,6 +4878,10 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; + break; + case BPF_MAP_TYPE_STRUCT_OPS: + create_attr.btf_value_type_id = 0; + break; default: break; } @@ -6339,6 +6374,14 @@ static struct { /* all other program types don't have "named" context structs */ }; +/* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef, + * for below __builtin_types_compatible_p() checks; + * with this approach we don't need any extra arch-specific #ifdef guards + */ +struct pt_regs; +struct user_pt_regs; +struct user_regs_struct; + static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, const char *subprog_name, int arg_idx, int arg_type_id, const char *ctx_name) @@ -6379,11 +6422,21 @@ static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_pro /* special cases */ switch (prog->type) { case BPF_PROG_TYPE_KPROBE: - case BPF_PROG_TYPE_PERF_EVENT: /* `struct pt_regs *` is expected, but we need to fix up */ if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) return true; break; + case BPF_PROG_TYPE_PERF_EVENT: + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && + btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return true; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && + btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) + return true; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && + btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) + return true; + break; case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: /* allow u64* as ctx */ @@ -6462,69 +6515,6 @@ static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_progr return fn_id; } -static int probe_kern_arg_ctx_tag(void) -{ - /* To minimize merge conflicts with BPF token series that refactors - * feature detection code a lot, we don't integrate - * probe_kern_arg_ctx_tag() into kernel_supports() feature-detection - * framework yet, doing our own caching internally. - * This will be cleaned up a bit later when bpf/bpf-next trees settle. - */ - static int cached_result = -1; - static const char strs[] = "\0a\0b\0arg:ctx\0"; - const __u32 types[] = { - /* [1] INT */ - BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4), - /* [2] PTR -> VOID */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), - /* [3] FUNC_PROTO `int(void *a)` */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), - BTF_PARAM_ENC(1 /* "a" */, 2), - /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */ - BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3), - /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), - BTF_PARAM_ENC(3 /* "b" */, 2), - /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */ - BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5), - /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */ - BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0), - }; - const struct bpf_insn insns[] = { - /* main prog */ - BPF_CALL_REL(+1), - BPF_EXIT_INSN(), - /* global subprog */ - BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */ - BPF_EXIT_INSN(), - }; - const struct bpf_func_info_min func_infos[] = { - { 0, 4 }, /* main prog -> FUNC 'a' */ - { 2, 6 }, /* subprog -> FUNC 'b' */ - }; - LIBBPF_OPTS(bpf_prog_load_opts, opts); - int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns); - - if (cached_result >= 0) - return cached_result; - - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), 0); - if (btf_fd < 0) - return 0; - - opts.prog_btf_fd = btf_fd; - opts.func_info = &func_infos; - opts.func_info_cnt = ARRAY_SIZE(func_infos); - opts.func_info_rec_size = sizeof(func_infos[0]); - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx", - "GPL", insns, insn_cnt, &opts); - close(btf_fd); - - cached_result = probe_fd(prog_fd); - return cached_result; -} - /* Check if main program or global subprog's function prototype has `arg:ctx` * argument tags, and, if necessary, substitute correct type to match what BPF * verifier would expect, taking into account specific program type. This @@ -6549,7 +6539,7 @@ static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_progra return 0; /* don't do any fix ups if kernel natively supports __arg_ctx */ - if (probe_kern_arg_ctx_tag() > 0) + if (kernel_supports(obj, FEAT_ARG_CTX_TAG)) return 0; /* some BPF program types just don't have named context structs, so @@ -9340,7 +9330,9 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, return NULL; } -/* Collect the reloc from ELF and populate the st_ops->progs[] */ +/* Collect the reloc from ELF, populate the st_ops->progs[], and update + * st_ops->data for shadow type. + */ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { @@ -9454,6 +9446,14 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } st_ops->progs[member_idx] = prog; + + /* st_ops->data will be exposed to users, being returned by + * bpf_map__initial_value() as a pointer to the shadow + * type. All function pointers in the original struct type + * should be converted to a pointer to struct bpf_program + * in the shadow type. + */ + *((struct bpf_program **)(st_ops->data + moff)) = prog; } return 0; @@ -9912,6 +9912,12 @@ int bpf_map__set_initial_value(struct bpf_map *map, void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) { + if (bpf_map__is_struct_ops(map)) { + if (psize) + *psize = map->def.value_size; + return map->st_ops->data; + } + if (!map->mmaped) return NULL; *psize = map->def.value_size; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index d9e1f57534fa..86804fd90dd1 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -245,7 +245,6 @@ LIBBPF_0.3.0 { btf__parse_raw_split; btf__parse_split; btf__new_empty_split; - btf__new_split; ring_buffer__epoll_fd; } LIBBPF_0.2.0; @@ -326,7 +325,6 @@ LIBBPF_0.7.0 { bpf_xdp_detach; bpf_xdp_query; bpf_xdp_query_id; - btf_ext__raw_data; libbpf_probe_bpf_helper; libbpf_probe_bpf_map_type; libbpf_probe_bpf_prog_type; @@ -411,5 +409,8 @@ LIBBPF_1.3.0 { } LIBBPF_1.2.0; LIBBPF_1.4.0 { + global: bpf_token_create; + btf__new_split; + btf_ext__raw_data; } LIBBPF_1.3.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 930cc9616527..ad936ac5e639 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -19,6 +19,20 @@ #include <libelf.h> #include "relo_core.h" +/* Android's libc doesn't support AT_EACCESS in faccessat() implementation + * ([0]), and just returns -EINVAL even if file exists and is accessible. + * See [1] for issues caused by this. + * + * So just redefine it to 0 on Android. + * + * [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50 + * [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250 + */ +#ifdef __ANDROID__ +#undef AT_EACCESS +#define AT_EACCESS 0 +#endif + /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -358,6 +372,8 @@ enum kern_feature_id { FEAT_SYSCALL_WRAPPER, /* BPF multi-uprobe link support */ FEAT_UPROBE_MULTI_LINK, + /* Kernel supports arg:ctx tag (__arg_ctx) for global subprogs natively */ + FEAT_ARG_CTX_TAG, __FEAT_CNT, }; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 16bca56002ab..0d4be829551b 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -2732,7 +2732,7 @@ static int finalize_btf(struct bpf_linker *linker) /* Emit .BTF.ext section */ if (linker->btf_ext) { - raw_data = btf_ext__get_raw_data(linker->btf_ext, &raw_sz); + raw_data = btf_ext__raw_data(linker->btf_ext, &raw_sz); if (!raw_data) return -ENOMEM; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 090bcf6e3b3d..68a2def17175 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -496,8 +496,8 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) if (err) return libbpf_err(err); - opts->feature_flags = md.flags; - opts->xdp_zc_max_segs = md.xdp_zc_max_segs; + OPTS_SET(opts, feature_flags, md.flags); + OPTS_SET(opts, xdp_zc_max_segs, md.xdp_zc_max_segs); skip_feature_flags: return 0; diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index da1aa10bbcc3..8e9e09d84e26 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -11,11 +11,11 @@ $(SUBDIRS): $(MAKE) -C $@ ; \ fi -clean hardclean: +clean distclean: @for dir in $(SUBDIRS) ; do \ if [ -f "$$dir/Makefile" ] ; then \ $(MAKE) -C $$dir $@; \ fi \ done -.PHONY: clean all $(SUBDIRS) +.PHONY: all clean distclean $(SUBDIRS) diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py index 0f8239979670..f131e33ac3ee 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/cli.py @@ -6,7 +6,7 @@ import json import pprint import time -from lib import YnlFamily, Netlink +from lib import YnlFamily, Netlink, NlError class YnlEncoder(json.JSONEncoder): @@ -38,6 +38,8 @@ def main(): const=Netlink.NLM_F_APPEND) parser.add_argument('--process-unknown', action=argparse.BooleanOptionalAction) parser.add_argument('--output-json', action='store_true') + parser.add_argument('--dbg-small-recv', default=0, const=4000, + action='store', nargs='?', type=int) args = parser.parse_args() def output(msg): @@ -53,7 +55,10 @@ def main(): if args.json_text: attrs = json.loads(args.json_text) - ynl = YnlFamily(args.spec, args.schema, args.process_unknown) + ynl = YnlFamily(args.spec, args.schema, args.process_unknown, + recv_size=args.dbg_small_recv) + if args.dbg_small_recv: + ynl.set_recv_dbg(True) if args.ntf: ynl.ntf_subscribe(args.ntf) @@ -61,12 +66,16 @@ def main(): if args.sleep: time.sleep(args.sleep) - if args.do: - reply = ynl.do(args.do, attrs, args.flags) - output(reply) - if args.dump: - reply = ynl.dump(args.dump, attrs) - output(reply) + try: + if args.do: + reply = ynl.do(args.do, attrs, args.flags) + output(reply) + if args.dump: + reply = ynl.dump(args.dump, attrs) + output(reply) + except NlError as e: + print(e) + exit(1) if args.ntf: ynl.check_ntf() diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 7135028cb449..713f5fb9cc2d 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -43,11 +43,11 @@ protos.a: $(OBJS) clean: rm -f *.o -hardclean: clean +distclean: clean rm -f *.c *.h *.a regen: @../ynl-regen.sh -.PHONY: all clean hardclean regen +.PHONY: all clean distclean regen .DEFAULT_GOAL: all diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile index d2e50fd0a52d..dfff3ecd1cba 100644 --- a/tools/net/ynl/lib/Makefile +++ b/tools/net/ynl/lib/Makefile @@ -17,12 +17,13 @@ ynl.a: $(OBJS) ar rcs $@ $(OBJS) clean: rm -f *.o *.d *~ + rm -rf __pycache__ -hardclean: clean +distclean: clean rm -f *.a %.o: %.c $(COMPILE.c) -MMD -c -o $@ $< -.PHONY: all clean +.PHONY: all clean distclean .DEFAULT_GOAL=all diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/lib/__init__.py index f7eaa07783e7..9137b83e580a 100644 --- a/tools/net/ynl/lib/__init__.py +++ b/tools/net/ynl/lib/__init__.py @@ -2,7 +2,7 @@ from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \ SpecFamily, SpecOperation -from .ynl import YnlFamily, Netlink +from .ynl import YnlFamily, Netlink, NlError __all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet", - "SpecFamily", "SpecOperation", "YnlFamily", "Netlink"] + "SpecFamily", "SpecOperation", "YnlFamily", "Netlink", "NlError"] diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index a8099fab035d..6cf890080dc0 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -135,6 +135,8 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); /* Netlink message handling helpers */ +#define YNL_MSG_OVERFLOW 1 + static inline struct nlmsghdr *ynl_nlmsg_put_header(void *buf) { struct nlmsghdr *nlh = buf; @@ -239,11 +241,29 @@ ynl_attr_first(const void *start, size_t len, size_t skip) return ynl_attr_if_good(start + len, attr); } +static inline bool +__ynl_attr_put_overflow(struct nlmsghdr *nlh, size_t size) +{ + bool o; + + /* ynl_msg_start() stashed buffer length in nlmsg_pid. */ + o = nlh->nlmsg_len + NLA_HDRLEN + NLMSG_ALIGN(size) > nlh->nlmsg_pid; + if (o) + /* YNL_MSG_OVERFLOW is < NLMSG_HDRLEN, all subsequent checks + * are guaranteed to fail. + */ + nlh->nlmsg_pid = YNL_MSG_OVERFLOW; + return o; +} + static inline struct nlattr * ynl_attr_nest_start(struct nlmsghdr *nlh, unsigned int attr_type) { struct nlattr *attr; + if (__ynl_attr_put_overflow(nlh, 0)) + return ynl_nlmsg_end_addr(nlh) - NLA_HDRLEN; + attr = ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type | NLA_F_NESTED; nlh->nlmsg_len += NLA_HDRLEN; @@ -263,6 +283,9 @@ ynl_attr_put(struct nlmsghdr *nlh, unsigned int attr_type, { struct nlattr *attr; + if (__ynl_attr_put_overflow(nlh, size)) + return; + attr = ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type; attr->nla_len = NLA_HDRLEN + size; @@ -276,14 +299,17 @@ static inline void ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) { struct nlattr *attr; - const char *end; + size_t len; + + len = strlen(str); + if (__ynl_attr_put_overflow(nlh, len)) + return; attr = ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type; - end = stpcpy(ynl_attr_data(attr), str); - attr->nla_len = - NLA_HDRLEN + NLA_ALIGN(end - (char *)ynl_attr_data(attr)); + strcpy(ynl_attr_data(attr), str); + attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len); nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); } diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 86729119e1ef..b9e77af5af5f 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -404,9 +404,33 @@ struct nlmsghdr *ynl_msg_start(struct ynl_sock *ys, __u32 id, __u16 flags) nlh->nlmsg_flags = flags; nlh->nlmsg_seq = ++ys->seq; + /* This is a local YNL hack for length checking, we put the buffer + * length in nlmsg_pid, since messages sent to the kernel always use + * PID 0. Message needs to be terminated with ynl_msg_end(). + */ + nlh->nlmsg_pid = YNL_SOCKET_BUFFER_SIZE; + return nlh; } +static int ynl_msg_end(struct ynl_sock *ys, struct nlmsghdr *nlh) +{ + /* We stash buffer length in nlmsg_pid. */ + if (nlh->nlmsg_pid == 0) { + yerr(ys, YNL_ERROR_INPUT_INVALID, + "Unknown input buffer length"); + return -EINVAL; + } + if (nlh->nlmsg_pid == YNL_MSG_OVERFLOW) { + yerr(ys, YNL_ERROR_INPUT_TOO_BIG, + "Constructred message longer than internal buffer"); + return -EMSGSIZE; + } + + nlh->nlmsg_pid = 0; + return 0; +} + struct nlmsghdr * ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags, __u8 cmd, __u8 version) @@ -561,6 +585,7 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts) ys->mcast_groups[i].name[GENL_NAMSIZ - 1] = 0; } } + i++; } return 0; @@ -606,6 +631,10 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) nlh = ynl_gemsg_start_req(ys, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 1); ynl_attr_put_str(nlh, CTRL_ATTR_FAMILY_NAME, family_name); + err = ynl_msg_end(ys, nlh); + if (err < 0) + return err; + err = send(ys->socket, nlh, nlh->nlmsg_len, 0); if (err < 0) { perr(ys, "failed to request socket family info"); @@ -867,6 +896,10 @@ int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh, { int err; + err = ynl_msg_end(ys, req_nlh); + if (err < 0) + return err; + err = send(ys->socket, req_nlh, req_nlh->nlmsg_len, 0); if (err < 0) return err; @@ -920,6 +953,10 @@ int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, { int err; + err = ynl_msg_end(ys, req_nlh); + if (err < 0) + return err; + err = send(ys->socket, req_nlh, req_nlh->nlmsg_len, 0); if (err < 0) return err; diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index dbeeef8ce91a..9842e85a8c57 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -20,6 +20,8 @@ enum ynl_error_code { YNL_ERROR_ATTR_INVALID, YNL_ERROR_UNKNOWN_NTF, YNL_ERROR_INV_RESP, + YNL_ERROR_INPUT_INVALID, + YNL_ERROR_INPUT_TOO_BIG, }; /** diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index ac55aa5a3083..2d7fdd903d9e 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -7,6 +7,7 @@ import random import socket import struct from struct import Struct +import sys import yaml import ipaddress import uuid @@ -84,6 +85,10 @@ class NlError(Exception): return f"Netlink error: {os.strerror(-self.nl_msg.error)}\n{self.nl_msg}" +class ConfigError(Exception): + pass + + class NlAttr: ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little']) type_formats = { @@ -213,11 +218,11 @@ class NlMsg: return self.nl_type def __repr__(self): - msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}\n" + msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}" if self.error: - msg += '\terror: ' + str(self.error) + msg += '\n\terror: ' + str(self.error) if self.extack: - msg += '\textack: ' + repr(self.extack) + msg += '\n\textack: ' + repr(self.extack) return msg @@ -348,6 +353,9 @@ class NetlinkProtocol: raise Exception(f'Multicast group "{mcast_name}" not present in the spec') return mcast_groups[mcast_name].value + def msghdr_size(self): + return 16 + class GenlProtocol(NetlinkProtocol): def __init__(self, family_name): @@ -373,6 +381,8 @@ class GenlProtocol(NetlinkProtocol): raise Exception(f'Multicast group "{mcast_name}" not present in the family') return self.genl_family['mcast'][mcast_name] + def msghdr_size(self): + return super().msghdr_size() + 4 class SpaceAttrs: @@ -400,7 +410,8 @@ class SpaceAttrs: class YnlFamily(SpecFamily): - def __init__(self, def_path, schema=None, process_unknown=False): + def __init__(self, def_path, schema=None, process_unknown=False, + recv_size=0): super().__init__(def_path, schema) self.include_raw = False @@ -415,6 +426,17 @@ class YnlFamily(SpecFamily): except KeyError: raise Exception(f"Family '{self.yaml['name']}' not supported by the kernel") + self._recv_dbg = False + # Note that netlink will use conservative (min) message size for + # the first dump recv() on the socket, our setting will only matter + # from the second recv() on. + self._recv_size = recv_size if recv_size else 131072 + # Netlink will always allocate at least PAGE_SIZE - sizeof(skb_shinfo) + # for a message, so smaller receive sizes will lead to truncation. + # Note that the min size for other families may be larger than 4k! + if self._recv_size < 4000: + raise ConfigError() + self.sock = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, self.nlproto.proto_num) self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1) self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_EXT_ACK, 1) @@ -438,6 +460,17 @@ class YnlFamily(SpecFamily): self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP, mcast_id) + def set_recv_dbg(self, enabled): + self._recv_dbg = enabled + + def _recv_dbg_print(self, reply, nl_msgs): + if not self._recv_dbg: + return + print("Recv: read", len(reply), "bytes,", + len(nl_msgs.msgs), "messages", file=sys.stderr) + for nl_msg in nl_msgs: + print(" ", nl_msg, file=sys.stderr) + def _encode_enum(self, attr_spec, value): enum = self.consts[attr_spec['enum']] if enum.type == 'flags' or attr_spec.get('enum-as-flags', False): @@ -562,6 +595,16 @@ class YnlFamily(SpecFamily): decoded.append({ item.type: subattrs }) return decoded + def _decode_nest_type_value(self, attr, attr_spec): + decoded = {} + value = attr + for name in attr_spec['type-value']: + value = NlAttr(value.raw, 0) + decoded[name] = value.type + subattrs = self._decode(NlAttrs(value.raw), attr_spec['nested-attributes']) + decoded.update(subattrs) + return decoded + def _decode_unknown(self, attr): if attr.is_nest: return self._decode(NlAttrs(attr.raw), None) @@ -653,6 +696,8 @@ class YnlFamily(SpecFamily): decoded = {"value": value, "selector": selector} elif attr_spec["type"] == 'sub-message': decoded = self._decode_sub_msg(attr, attr_spec, search_attrs) + elif attr_spec["type"] == 'nest-type-value': + decoded = self._decode_nest_type_value(attr, attr_spec) else: if not self.process_unknown: raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') @@ -693,7 +738,7 @@ class YnlFamily(SpecFamily): return msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set)) - offset = 20 + self._struct_size(op.fixed_header) + offset = self.nlproto.msghdr_size() + self._struct_size(op.fixed_header) path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset, extack['bad-attr-offs']) if path: @@ -799,11 +844,12 @@ class YnlFamily(SpecFamily): def check_ntf(self): while True: try: - reply = self.sock.recv(128 * 1024, socket.MSG_DONTWAIT) + reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT) except BlockingIOError: return nms = NlMsgs(reply) + self._recv_dbg_print(reply, nms) for nl_msg in nms: if nl_msg.error: print("Netlink error in ntf!?", os.strerror(-nl_msg.error)) @@ -854,8 +900,9 @@ class YnlFamily(SpecFamily): done = False rsp = [] while not done: - reply = self.sock.recv(128 * 1024) + reply = self.sock.recv(self._recv_size) nms = NlMsgs(reply, attr_space=op.attr_set) + self._recv_dbg_print(reply, nms) for nl_msg in nms: if nl_msg.extack: self._decode_extack(msg, op, nl_msg.extack) diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile index 1d33e98e3ffe..e194a7565861 100644 --- a/tools/net/ynl/samples/Makefile +++ b/tools/net/ynl/samples/Makefile @@ -28,8 +28,8 @@ $(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS) clean: rm -f *.o *.d *~ -hardclean: clean +distclean: clean rm -f $(BINS) -.PHONY: all clean +.PHONY: all clean distclean .DEFAULT_GOAL=all diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 2f5febfe66a1..67bfaff05154 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1667,7 +1667,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): aspec = struct[anest] ri.cw.block_start(line=f"if (n_{aspec.c_name})") - ri.cw.p(f"dst->{aspec.c_name} = calloc({aspec.c_name}, sizeof(*dst->{aspec.c_name}));") + ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};") ri.cw.p('i = 0;') ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index caff3834671f..030b388800f0 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -13,6 +13,7 @@ ldflags-y += --wrap=cxl_hdm_decode_init ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys +ldflags-y += --wrap=cxl_endpoint_parse_cdat DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index a3cdbb2be038..908e0d083936 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -15,6 +15,8 @@ static int interleave_arithmetic; +#define FAKE_QTG_ID 42 + #define NR_CXL_HOST_BRIDGES 2 #define NR_CXL_SINGLE_HOST 1 #define NR_CXL_RCH 1 @@ -209,7 +211,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, - .qtg_id = 0, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, }, .target = { 0 }, @@ -224,7 +226,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, - .qtg_id = 1, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, }, .target = { 0, 1, }, @@ -239,7 +241,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 2, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, }, .target = { 0 }, @@ -254,7 +256,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 3, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, }, .target = { 0, 1, }, @@ -269,7 +271,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 4, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, }, .target = { 2 }, @@ -284,7 +286,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, - .qtg_id = 5, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M, }, .target = { 3 }, @@ -301,7 +303,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 0, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, }, .target = { 0, }, @@ -317,7 +319,7 @@ static struct { .granularity = 0, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 1, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, }, .target = { 0, 1, }, @@ -333,7 +335,7 @@ static struct { .granularity = 0, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 0, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 16UL, }, .target = { 0, 1, 0, 1, }, @@ -976,6 +978,48 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) return 0; } +/* + * Faking the cxl_dpa_perf for the memdev when appropriate. + */ +static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range, + struct cxl_dpa_perf *dpa_perf) +{ + dpa_perf->qos_class = FAKE_QTG_ID; + dpa_perf->dpa_range = *range; + dpa_perf->coord.read_latency = 500; + dpa_perf->coord.write_latency = 500; + dpa_perf->coord.read_bandwidth = 1000; + dpa_perf->coord.write_bandwidth = 1000; +} + +static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) +{ + struct cxl_root *cxl_root __free(put_cxl_root) = + find_cxl_root(port); + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct range pmem_range = { + .start = cxlds->pmem_res.start, + .end = cxlds->pmem_res.end, + }; + struct range ram_range = { + .start = cxlds->ram_res.start, + .end = cxlds->ram_res.end, + }; + + if (!cxl_root) + return; + + if (range_len(&ram_range)) + dpa_perf_setup(port, &ram_range, &mds->ram_perf); + + if (range_len(&pmem_range)) + dpa_perf_setup(port, &pmem_range, &mds->pmem_perf); + + cxl_memdev_update_perf(cxlmd); +} + static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, @@ -989,6 +1033,7 @@ static struct cxl_mock_ops cxl_mock_ops = { .devm_cxl_setup_hdm = mock_cxl_setup_hdm, .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder, .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders, + .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 1a61e68e3095..6f737941dc0e 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -285,6 +285,20 @@ resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, CXL); +void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + + if (ops && ops->is_mock_dev(cxlmd->dev.parent)) + ops->cxl_endpoint_parse_cdat(port); + else + cxl_endpoint_parse_cdat(port); + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index a94223750346..d1b0271d2822 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -25,6 +25,7 @@ struct cxl_mock_ops { int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port); int (*devm_cxl_enumerate_decoders)( struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info); + void (*cxl_endpoint_parse_cdat)(struct cxl_port *port); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops); diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c index a52ecd43dbe3..ca81afa4ee90 100644 --- a/tools/testing/selftests/alsa/test-pcmtest-driver.c +++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c @@ -127,11 +127,11 @@ FIXTURE_SETUP(pcmtest) { int err; if (geteuid()) - SKIP(exit(-1), "This test needs root to run!"); + SKIP(return, "This test needs root to run!"); err = read_patterns(); if (err) - SKIP(exit(-1), "Can't read patterns. Probably, module isn't loaded"); + SKIP(return, "Can't read patterns. Probably, module isn't loaded"); card_name = malloc(127); ASSERT_NE(card_name, NULL); diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 5c2cc7e8c5d0..0445ac38bc07 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,6 +1,5 @@ bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 -exceptions # JIT does not support calling kfunc bpf_throw: -524 fexit_sleep # The test never returns. The remaining tests cannot start. kprobe_multi_bench_attach # needs CONFIG_FPROBE kprobe_multi_test # needs CONFIG_FPROBE diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fd15017ed3b1..84cb5500e8ef 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -41,6 +41,19 @@ CFLAGS += -g $(OPT_FLAGS) -rdynamic \ LDFLAGS += $(SAN_LDFLAGS) LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread +# The following tests perform type punning and they may break strict +# aliasing rules, which are exploited by both GCC and clang by default +# while optimizing. This can lead to broken programs. +progs/bind4_prog.c-CFLAGS := -fno-strict-aliasing +progs/bind6_prog.c-CFLAGS := -fno-strict-aliasing +progs/dynptr_fail.c-CFLAGS := -fno-strict-aliasing +progs/linked_list_fail.c-CFLAGS := -fno-strict-aliasing +progs/map_kptr_fail.c-CFLAGS := -fno-strict-aliasing +progs/syscall.c-CFLAGS := -fno-strict-aliasing +progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing +progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing +progs/timer_crash.c-CFLAGS := -fno-strict-aliasing + ifneq ($(LLVM),) # Silence some warnings when compiled with clang CFLAGS += -Wno-unused-command-line-argument @@ -64,6 +77,15 @@ TEST_INST_SUBDIRS := no_alu32 ifneq ($(BPF_GCC),) TEST_GEN_PROGS += test_progs-bpf_gcc TEST_INST_SUBDIRS += bpf_gcc + +# The following tests contain C code that, although technically legal, +# triggers GCC warnings that cannot be disabled: declaration of +# anonymous struct types in function parameter lists. +progs/btf_dump_test_case_bitfields.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_namespacing.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_packing.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_padding.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_syntax.c-CFLAGS := -Wno-error endif ifneq ($(CLANG_CPUV4),) @@ -110,7 +132,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ - xdp_features + xdp_features bpf_test_no_cfi.ko TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi @@ -175,8 +197,7 @@ endif # NOTE: Semicolon at the end is critical to override lib.mk's default static # rule for binaries. $(notdir $(TEST_GEN_PROGS) \ - $(TEST_GEN_PROGS_EXTENDED) \ - $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; + $(TEST_GEN_PROGS_EXTENDED)): %: $(OUTPUT)/% ; # sort removes libbpf duplicates when not cross-building MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ @@ -233,6 +254,12 @@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmo $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod $(Q)cp bpf_testmod/bpf_testmod.ko $@ +$(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch]) + $(call msg,MOD,,$@) + $(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation + $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_test_no_cfi + $(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@ + DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool ifneq ($(CROSS_COMPILE),) CROSS_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool @@ -382,11 +409,11 @@ endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ - -I$(abspath $(OUTPUT)/../usr/include) + -I$(abspath $(OUTPUT)/../usr/include) \ + -Wno-compare-distinct-pointer-types # TODO: enable me -Wsign-compare -CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types +CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) $(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline @@ -504,7 +531,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \ $(wildcard $(BPFDIR)/*.bpf.h) \ | $(TRUNNER_OUTPUT) $$(BPFOBJ) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ - $(TRUNNER_BPF_CFLAGS)) + $(TRUNNER_BPF_CFLAGS) \ + $$($$<-CFLAGS)) $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) @@ -514,6 +542,7 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) $(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$@ $(Q)$$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$(@:.skel.h=.subskel.h) + $(Q)rm -f $$(<:.o=.linked1.o) $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) @@ -522,6 +551,7 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o) $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@ + $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.bpf.o)) @@ -532,6 +562,7 @@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) $(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@ $(Q)$$(BPFTOOL) gen subskeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$(@:.skel.h=.subskel.h) + $(Q)rm -f $$(@:.skel.h=.linked1.o) $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o) endif # ensure we set up tests.h header generation rule just once @@ -606,6 +637,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ flow_dissector_load.h \ ip_check_defrag_frags.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ + $(OUTPUT)/bpf_test_no_cfi.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ $(OUTPUT)/sign-file \ @@ -729,11 +761,12 @@ $(OUTPUT)/uprobe_multi: uprobe_multi.c $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ +EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature bpftool \ $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \ no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \ + bpf_test_no_cfi.ko \ liburandom_read.so) .PHONY: docs docs-clean diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 73ce11b0547d..1724d50ba942 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -323,14 +323,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) break; case 'p': env.producer_cnt = strtol(arg, NULL, 10); - if (env.producer_cnt <= 0) { + if (env.producer_cnt < 0) { fprintf(stderr, "Invalid producer count: %s\n", arg); argp_usage(state); } break; case 'c': env.consumer_cnt = strtol(arg, NULL, 10); - if (env.consumer_cnt <= 0) { + if (env.consumer_cnt < 0) { fprintf(stderr, "Invalid consumer count: %s\n", arg); argp_usage(state); } @@ -607,6 +607,10 @@ static void setup_benchmark(void) bench->setup(); for (i = 0; i < env.consumer_cnt; i++) { + if (!bench->consumer_thread) { + fprintf(stderr, "benchmark doesn't support consumers!\n"); + exit(1); + } err = pthread_create(&state.consumers[i], NULL, bench->consumer_thread, (void *)(long)i); if (err) { @@ -626,6 +630,10 @@ static void setup_benchmark(void) env.prod_cpus.next_cpu = env.cons_cpus.next_cpu; for (i = 0; i < env.producer_cnt; i++) { + if (!bench->producer_thread) { + fprintf(stderr, "benchmark doesn't support producers!\n"); + exit(1); + } err = pthread_create(&state.producers[i], NULL, bench->producer_thread, (void *)(long)i); if (err) { diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 23c24f852f4f..14ebe7d9e1a3 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -9,7 +9,7 @@ struct bpf_sock_addr_kern; * Error code */ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; + struct bpf_dynptr *ptr__uninit) __ksym __weak; /* Description * Initializes an xdp-type dynptr @@ -17,7 +17,7 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, * Error code */ extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; + struct bpf_dynptr *ptr__uninit) __ksym __weak; /* Description * Obtain a read-only pointer to the dynptr's data @@ -26,7 +26,7 @@ extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, * buffer if unable to obtain a direct pointer */ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym; + void *buffer, __u32 buffer__szk) __ksym __weak; /* Description * Obtain a read-write pointer to the dynptr's data @@ -35,13 +35,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, * buffer if unable to obtain a direct pointer */ extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym; + void *buffer, __u32 buffer__szk) __ksym __weak; -extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym; -extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym; -extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; -extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; -extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; +extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak; +extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak; +extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak; +extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; +extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak; /* Description * Modify the address of a AF_UNIX sockaddr. @@ -63,7 +63,7 @@ extern int bpf_sk_assign_tcp_reqsk(struct __sk_buff *skb, struct sock *sk, void *bpf_cast_to_kern_ctx(void *) __ksym; -void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; +extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; extern int bpf_get_file_xattr(struct file *file, const char *name, struct bpf_dynptr *value_ptr) __ksym; diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile new file mode 100644 index 000000000000..ed5143b79edf --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile @@ -0,0 +1,19 @@ +BPF_TEST_NO_CFI_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= $(abspath $(BPF_TEST_NO_CFI_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = bpf_test_no_cfi.ko + +obj-m += bpf_test_no_cfi.o + +all: + +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) clean + diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c new file mode 100644 index 000000000000..b1dd889d5d7d --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/init.h> +#include <linux/module.h> + +struct bpf_test_no_cfi_ops { + void (*fn_1)(void); + void (*fn_2)(void); +}; + +static int dummy_init(struct btf *btf) +{ + return 0; +} + +static int dummy_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return 0; +} + +static int dummy_reg(void *kdata) +{ + return 0; +} + +static void dummy_unreg(void *kdata) +{ +} + +static const struct bpf_verifier_ops dummy_verifier_ops; + +static void bpf_test_no_cfi_ops__fn_1(void) +{ +} + +static void bpf_test_no_cfi_ops__fn_2(void) +{ +} + +static struct bpf_test_no_cfi_ops __test_no_cif_ops = { + .fn_1 = bpf_test_no_cfi_ops__fn_1, + .fn_2 = bpf_test_no_cfi_ops__fn_2, +}; + +static struct bpf_struct_ops test_no_cif_ops = { + .verifier_ops = &dummy_verifier_ops, + .init = dummy_init, + .init_member = dummy_init_member, + .reg = dummy_reg, + .unreg = dummy_unreg, + .name = "bpf_test_no_cfi_ops", + .owner = THIS_MODULE, +}; + +static int bpf_test_no_cfi_init(void) +{ + int ret; + + ret = register_bpf_struct_ops(&test_no_cif_ops, + bpf_test_no_cfi_ops); + if (!ret) + return -EINVAL; + + test_no_cif_ops.cfi_stubs = &__test_no_cif_ops; + ret = register_bpf_struct_ops(&test_no_cif_ops, + bpf_test_no_cfi_ops); + return ret; +} + +static void bpf_test_no_cfi_exit(void) +{ +} + +module_init(bpf_test_no_cfi_init); +module_exit(bpf_test_no_cfi_exit); + +MODULE_AUTHOR("Kuifeng Lee"); +MODULE_DESCRIPTION("BPF no cfi_stubs test module"); +MODULE_LICENSE("Dual BSD/GPL"); + diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 8befaf17d454..098ddd067224 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -343,12 +343,12 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET8_START(bpf_testmod_common_kfunc_ids) +BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_kfunc_common_test) -BTF_SET8_END(bpf_testmod_common_kfunc_ids) +BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { .owner = THIS_MODULE, @@ -494,7 +494,7 @@ __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused return arg; } -BTF_SET8_START(bpf_testmod_check_kfunc_ids) +BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_ID_FLAGS(func, bpf_kfunc_call_test1) BTF_ID_FLAGS(func, bpf_kfunc_call_test2) @@ -520,7 +520,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) -BTF_SET8_END(bpf_testmod_check_kfunc_ids) +BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids) static int bpf_testmod_ops_init(struct btf *btf) { @@ -539,6 +539,15 @@ static int bpf_testmod_ops_init_member(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata) { + if (member->offset == offsetof(struct bpf_testmod_ops, data) * 8) { + /* For data fields, this function has to copy it and return + * 1 to indicate that the data has been handled by the + * struct_ops type, or the verifier will reject the map if + * the value of the data field is not zero. + */ + ((struct bpf_testmod_ops *)kdata)->data = ((struct bpf_testmod_ops *)udata)->data; + return 1; + } return 0; } @@ -554,9 +563,12 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { static int bpf_dummy_reg(void *kdata) { struct bpf_testmod_ops *ops = kdata; - int r; - r = ops->test_2(4, 3); + /* Some test cases (ex. struct_ops_maybe_null) may not have test_2 + * initialized, so we need to check for NULL. + */ + if (ops->test_2) + ops->test_2(4, ops->data); return 0; } @@ -570,7 +582,12 @@ static int bpf_testmod_test_1(void) return 0; } -static int bpf_testmod_test_2(int a, int b) +static void bpf_testmod_test_2(int a, int b) +{ +} + +static int bpf_testmod_ops__test_maybe_null(int dummy, + struct task_struct *task__nullable) { return 0; } @@ -578,6 +595,7 @@ static int bpf_testmod_test_2(int a, int b) static struct bpf_testmod_ops __bpf_testmod_ops = { .test_1 = bpf_testmod_test_1, .test_2 = bpf_testmod_test_2, + .test_maybe_null = bpf_testmod_ops__test_maybe_null, }; struct bpf_struct_ops bpf_bpf_testmod_ops = { @@ -619,7 +637,7 @@ static void bpf_testmod_exit(void) while (refcount_read(&prog_test_struct.cnt) > 1) msleep(20); - return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } module_init(bpf_testmod_init); diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index ca5435751c79..971458acfac3 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -5,6 +5,8 @@ #include <linux/types.h> +struct task_struct; + struct bpf_testmod_test_read_ctx { char *buf; loff_t off; @@ -30,7 +32,17 @@ struct bpf_iter_testmod_seq { struct bpf_testmod_ops { int (*test_1)(void); - int (*test_2)(int a, int b); + void (*test_2)(int a, int b); + /* Used to test nullable arguments. */ + int (*test_maybe_null)(int dummy, struct task_struct *task); + + /* The following fields are used to test shadow copies. */ + char onebyte; + struct { + int a; + int b; + } unsupported; + int data; }; #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index c2e886399e3c..ecf89df78109 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -27,7 +27,7 @@ static void verify_success(const char *prog_name) struct bpf_program *prog; struct bpf_link *link = NULL; pid_t child_pid; - int status; + int status, err; skel = cpumask_success__open(); if (!ASSERT_OK_PTR(skel, "cpumask_success__open")) @@ -36,8 +36,8 @@ static void verify_success(const char *prog_name) skel->bss->pid = getpid(); skel->bss->nr_cpus = libbpf_num_possible_cpus(); - cpumask_success__load(skel); - if (!ASSERT_OK_PTR(skel, "cpumask_success__load")) + err = cpumask_success__load(skel); + if (!ASSERT_OK(err, "cpumask_success__load")) goto cleanup; prog = bpf_object__find_program_by_name(skel->obj, prog_name); diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c index 5c0ebe6ba866..dcb9e5070cc3 100644 --- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c +++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c @@ -72,6 +72,6 @@ fail: bpf_tc_hook_destroy(&qdisc_hook); close_netns(nstoken); } - SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); + SYS_NOFAIL("ip netns del " NS_TEST); decap_sanity__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 4ad4cd69152e..3379df2d4cf2 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -298,6 +298,6 @@ void test_fib_lookup(void) fail: if (nstoken) close_netns(nstoken); - SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); + SYS_NOFAIL("ip netns del " NS_TEST); fib_lookup__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c index 57c814f5f6a7..8dd2af9081f4 100644 --- a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c @@ -59,9 +59,9 @@ static int setup_topology(bool ipv6) /* Wait for up to 5s for links to come up */ for (i = 0; i < 5; ++i) { if (ipv6) - up = !system("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6 " &>/dev/null"); + up = !SYS_NOFAIL("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6); else - up = !system("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR " &>/dev/null"); + up = !SYS_NOFAIL("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR); if (up) break; diff --git a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c index 15144943e88b..7def158da9eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c +++ b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c @@ -13,7 +13,8 @@ void test_kptr_xchg_inline(void) unsigned int cnt; int err; -#if !(defined(__x86_64__) || defined(__aarch64__)) +#if !(defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__riscv) && __riscv_xlen == 64)) test__skip(); return; #endif diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index 7a3fa2ff567b..90a98e23be61 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -169,9 +169,9 @@ void test_log_fixup(void) if (test__start_subtest("bad_core_relo_trunc_none")) bad_core_relo(0, TRUNC_NONE /* full buf */); if (test__start_subtest("bad_core_relo_trunc_partial")) - bad_core_relo(280, TRUNC_PARTIAL /* truncate original log a bit */); + bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); if (test__start_subtest("bad_core_relo_trunc_full")) - bad_core_relo(220, TRUNC_FULL /* truncate also libbpf's message patch */); + bad_core_relo(240, TRUNC_FULL /* truncate also libbpf's message patch */); if (test__start_subtest("bad_core_relo_subprog")) bad_core_relo_subprog(); if (test__start_subtest("missing_map")) diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h index e9190574e79f..fb1eb8c67361 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -27,8 +27,6 @@ } \ }) -#define NETNS "ns_lwt" - static inline int netns_create(void) { return system("ip netns add " NETNS); diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c index 59b38569f310..835a1d756c16 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c @@ -54,6 +54,7 @@ #include <stdbool.h> #include <stdlib.h> +#define NETNS "ns_lwt_redirect" #include "lwt_helpers.h" #include "test_progs.h" #include "network_helpers.h" @@ -85,7 +86,7 @@ static void ping_dev(const char *dev, bool is_ingress) snprintf(ip, sizeof(ip), "20.0.0.%d", link_index); /* We won't get a reply. Don't fail here */ - SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", + SYS_NOFAIL("ping %s -c1 -W1 -s %d", ip, ICMP_PAYLOAD_SIZE); } @@ -203,6 +204,7 @@ static int setup_redirect_target(const char *target_dev, bool need_mac) if (!ASSERT_GE(target_index, 0, "if_nametoindex")) goto fail; + SYS(fail, "sysctl -w net.ipv6.conf.all.disable_ipv6=1"); SYS(fail, "ip link add link_err type dummy"); SYS(fail, "ip link set lo up"); SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c index f4bb2d5fcae0..03825d2b45a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c @@ -48,6 +48,7 @@ * For case 2, force UDP packets to overflow fq limit. As long as kernel * is not crashed, it is considered successful. */ +#define NETNS "ns_lwt_reroute" #include "lwt_helpers.h" #include "network_helpers.h" #include <linux/net_tstamp.h> @@ -63,7 +64,7 @@ static void ping_once(const char *ip) { /* We won't get a reply. Don't fail here */ - SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", + SYS_NOFAIL("ping %s -c1 -W1 -s %d", ip, ICMP_PAYLOAD_SIZE); } diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 7c0be7cf550b..8f8d792307c1 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -79,7 +79,7 @@ static void cleanup_netns(struct nstoken *nstoken) if (nstoken) close_netns(nstoken); - SYS_NOFAIL("ip netns del %s &> /dev/null", NS_TEST); + SYS_NOFAIL("ip netns del %s", NS_TEST); } static int verify_tsk(int map_fd, int client_fd) diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index 3f1f58d3a729..a1f7e7378a64 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -29,6 +29,10 @@ static void test_success(void) bpf_program__set_autoload(skel->progs.non_sleepable_1, true); bpf_program__set_autoload(skel->progs.non_sleepable_2, true); bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true); + bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true); + bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true); + bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_lock, true); + bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_unlock, true); err = rcu_read_lock__load(skel); if (!ASSERT_OK(err, "skel_load")) goto out; @@ -75,6 +79,8 @@ static const char * const inproper_region_tests[] = { "inproper_sleepable_helper", "inproper_sleepable_kfunc", "nested_rcu_region", + "rcu_read_lock_global_subprog_lock", + "rcu_read_lock_global_subprog_unlock", }; static void test_inproper_region(void) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c index b0583309a94e..9c11938fe597 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c @@ -214,7 +214,7 @@ void test_sock_destroy(void) cleanup: if (nstoken) close_netns(nstoken); - SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + SYS_NOFAIL("ip netns del " TEST_NS); if (cgroup_fd >= 0) close(cgroup_fd); sock_destroy_prog__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index 0c365f36c73b..d56e18b25528 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -112,7 +112,7 @@ void test_sock_iter_batch(void) { struct nstoken *nstoken = NULL; - SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + SYS_NOFAIL("ip netns del " TEST_NS); SYS(done, "ip netns add %s", TEST_NS); SYS(done, "ip -net %s link set dev lo up", TEST_NS); @@ -131,5 +131,5 @@ void test_sock_iter_batch(void) close_netns(nstoken); done: - SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + SYS_NOFAIL("ip netns del " TEST_NS); } diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index 18d451be57c8..2b0068742ef9 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -48,6 +48,8 @@ static struct { { "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" }, { "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" }, { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, + { "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" }, + { "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" }, }; static int match_regex(const char *pattern, const char *string) diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index ea8537c54413..c33c05161a9e 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -117,12 +117,6 @@ static void test_recursion(void) ASSERT_OK(err, "lookup map_b"); ASSERT_EQ(value, 100, "map_b value"); - prog_fd = bpf_program__fd(skel->progs.on_lookup); - memset(&info, 0, sizeof(info)); - err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); - ASSERT_OK(err, "get prog info"); - ASSERT_GT(info.recursion_misses, 0, "on_lookup prog recursion"); - prog_fd = bpf_program__fd(skel->progs.on_update); memset(&info, 0, sizeof(info)); err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c new file mode 100644 index 000000000000..01dc2613c8a5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> + +#include "struct_ops_maybe_null.skel.h" +#include "struct_ops_maybe_null_fail.skel.h" + +/* Test that the verifier accepts a program that access a nullable pointer + * with a proper check. + */ +static void maybe_null(void) +{ + struct struct_ops_maybe_null *skel; + + skel = struct_ops_maybe_null__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_and_load")) + return; + + struct_ops_maybe_null__destroy(skel); +} + +/* Test that the verifier rejects a program that access a nullable pointer + * without a check beforehand. + */ +static void maybe_null_fail(void) +{ + struct struct_ops_maybe_null_fail *skel; + + skel = struct_ops_maybe_null_fail__open_and_load(); + if (ASSERT_ERR_PTR(skel, "struct_ops_module_fail__open_and_load")) + return; + + struct_ops_maybe_null_fail__destroy(skel); +} + +void test_struct_ops_maybe_null(void) +{ + /* The verifier verifies the programs at load time, so testing both + * programs in the same compile-unit is complicated. We run them in + * separate objects to simplify the testing. + */ + if (test__start_subtest("maybe_null")) + maybe_null(); + if (test__start_subtest("maybe_null_fail")) + maybe_null_fail(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c index 8d833f0c7580..7d6facf46ebb 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c @@ -32,17 +32,23 @@ cleanup: static void test_struct_ops_load(void) { - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); struct struct_ops_module *skel; struct bpf_map_info info = {}; struct bpf_link *link; int err; u32 len; - skel = struct_ops_module__open_opts(&opts); + skel = struct_ops_module__open(); if (!ASSERT_OK_PTR(skel, "struct_ops_module_open")) return; + skel->struct_ops.testmod_1->data = 13; + skel->struct_ops.testmod_1->test_2 = skel->progs.test_3; + /* Since test_2() is not being used, it should be disabled from + * auto-loading, or it will fail to load. + */ + bpf_program__set_autoload(skel->progs.test_2, false); + err = struct_ops_module__load(skel); if (!ASSERT_OK(err, "struct_ops_module_load")) goto cleanup; @@ -56,8 +62,13 @@ static void test_struct_ops_load(void) link = bpf_map__attach_struct_ops(skel->maps.testmod_1); ASSERT_OK_PTR(link, "attach_test_mod_1"); - /* test_2() will be called from bpf_dummy_reg() in bpf_testmod.c */ - ASSERT_EQ(skel->bss->test_2_result, 7, "test_2_result"); + /* test_3() will be called from bpf_dummy_reg() in bpf_testmod.c + * + * In bpf_testmod.c it will pass 4 and 13 (the value of data) to + * .test_2. So, the value of test_2_result should be 20 (4 + 13 + + * 3). + */ + ASSERT_EQ(skel->bss->test_2_result, 20, "check_shadow_variables"); bpf_link__destroy(link); diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c new file mode 100644 index 000000000000..106ea447965a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <testing_helpers.h> + +static void load_bpf_test_no_cfi(void) +{ + int fd; + int err; + + fd = open("bpf_test_no_cfi.ko", O_RDONLY); + if (!ASSERT_GE(fd, 0, "open")) + return; + + /* The module will try to register a struct_ops type without + * cfi_stubs and with cfi_stubs. + * + * The one without cfi_stub should fail. The module will be loaded + * successfully only if the result of the registration is as + * expected, or it fails. + */ + err = finit_module(fd, "", 0); + close(fd); + if (!ASSERT_OK(err, "finit_module")) + return; + + err = delete_module("bpf_test_no_cfi", 0); + ASSERT_OK(err, "delete_module"); +} + +void test_struct_ops_no_cfi(void) +{ + if (test__start_subtest("load_bpf_test_no_cfi")) + load_bpf_test_no_cfi(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 2b3c6dd66259..5f1fb0a2ea56 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -118,9 +118,9 @@ fail: static void cleanup(void) { SYS_NOFAIL("test -f /var/run/netns/at_ns0 && ip netns delete at_ns0"); - SYS_NOFAIL("ip link del veth1 2> /dev/null"); - SYS_NOFAIL("ip link del %s 2> /dev/null", VXLAN_TUNL_DEV1); - SYS_NOFAIL("ip link del %s 2> /dev/null", IP6VXLAN_TUNL_DEV1); + SYS_NOFAIL("ip link del veth1"); + SYS_NOFAIL("ip link del %s", VXLAN_TUNL_DEV1); + SYS_NOFAIL("ip link del %s", IP6VXLAN_TUNL_DEV1); } static int add_vxlan_tunnel(void) @@ -265,9 +265,9 @@ fail: static void delete_ipip_tunnel(void) { SYS_NOFAIL("ip -n at_ns0 link delete dev %s", IPIP_TUNL_DEV0); - SYS_NOFAIL("ip -n at_ns0 fou del port 5555 2> /dev/null"); + SYS_NOFAIL("ip -n at_ns0 fou del port 5555"); SYS_NOFAIL("ip link delete dev %s", IPIP_TUNL_DEV1); - SYS_NOFAIL("ip fou del port 5555 2> /dev/null"); + SYS_NOFAIL("ip fou del port 5555"); } static int add_xfrm_tunnel(void) @@ -346,13 +346,13 @@ fail: static void delete_xfrm_tunnel(void) { - SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null", + SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32", IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0); - SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null", + SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32", IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1); - SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d", IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT); - SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d", IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); } diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c new file mode 100644 index 000000000000..a222df765bc3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "tracing_failure.skel.h" + +static void test_bpf_spin_lock(bool is_spin_lock) +{ + struct tracing_failure *skel; + int err; + + skel = tracing_failure__open(); + if (!ASSERT_OK_PTR(skel, "tracing_failure__open")) + return; + + if (is_spin_lock) + bpf_program__set_autoload(skel->progs.test_spin_lock, true); + else + bpf_program__set_autoload(skel->progs.test_spin_unlock, true); + + err = tracing_failure__load(skel); + if (!ASSERT_OK(err, "tracing_failure__load")) + goto out; + + err = tracing_failure__attach(skel); + ASSERT_ERR(err, "tracing_failure__attach"); + +out: + tracing_failure__destroy(skel); +} + +void test_tracing_failure(void) +{ + if (test__start_subtest("bpf_spin_lock")) + test_bpf_spin_lock(true); + if (test__start_subtest("bpf_spin_unlock")) + test_bpf_spin_lock(false); +} diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index d62c5bf00e71..9c6072a19745 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -28,6 +28,7 @@ #include "verifier_div0.skel.h" #include "verifier_div_overflow.skel.h" #include "verifier_global_subprogs.skel.h" +#include "verifier_global_ptr_args.skel.h" #include "verifier_gotol.skel.h" #include "verifier_helper_access_var_len.skel.h" #include "verifier_helper_packet_access.skel.h" @@ -140,6 +141,7 @@ void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_st void test_verifier_div0(void) { RUN(verifier_div0); } void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); } +void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); } void test_verifier_gotol(void) { RUN(verifier_gotol); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index c3b45745cbcc..6d8b54124cb3 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -511,7 +511,7 @@ static void test_xdp_bonding_features(struct skeletons *skeletons) if (!ASSERT_OK(err, "bond bpf_xdp_query")) goto out; - if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, + if (!ASSERT_EQ(query_opts.feature_flags, 0, "bond query_opts.feature_flags")) goto out; @@ -601,7 +601,7 @@ static void test_xdp_bonding_features(struct skeletons *skeletons) if (!ASSERT_OK(err, "bond bpf_xdp_query")) goto out; - ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, + ASSERT_EQ(query_opts.feature_flags, 0, "bond query_opts.feature_flags"); out: bpf_link__destroy(link); diff --git a/tools/testing/selftests/bpf/progs/async_stack_depth.c b/tools/testing/selftests/bpf/progs/async_stack_depth.c index 3517c0e01206..36734683acbd 100644 --- a/tools/testing/selftests/bpf/progs/async_stack_depth.c +++ b/tools/testing/selftests/bpf/progs/async_stack_depth.c @@ -30,7 +30,7 @@ static int bad_timer_cb(void *map, int *key, struct bpf_timer *timer) } SEC("tc") -__failure __msg("combined stack size of 2 calls is 576. Too large") +__failure __msg("combined stack size of 2 calls is") int pseudo_call_check(struct __sk_buff *ctx) { struct hmap_elem *elem; @@ -45,7 +45,7 @@ int pseudo_call_check(struct __sk_buff *ctx) } SEC("tc") -__failure __msg("combined stack size of 2 calls is 608. Too large") +__failure __msg("combined stack size of 2 calls is") int async_call_root_check(struct __sk_buff *ctx) { struct hmap_elem *elem; diff --git a/tools/testing/selftests/bpf/progs/bpf_compiler.h b/tools/testing/selftests/bpf/progs/bpf_compiler.h new file mode 100644 index 000000000000..a7c343dc82e6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_compiler.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_COMPILER_H__ +#define __BPF_COMPILER_H__ + +#define DO_PRAGMA_(X) _Pragma(#X) + +#if __clang__ +#define __pragma_loop_unroll DO_PRAGMA_(clang loop unroll(enable)) +#else +/* In GCC -funroll-loops, which is enabled with -O2, should have the + same impact than the loop-unroll-enable pragma above. */ +#define __pragma_loop_unroll +#endif + +#if __clang__ +#define __pragma_loop_unroll_count(N) DO_PRAGMA_(clang loop unroll_count(N)) +#else +#define __pragma_loop_unroll_count(N) DO_PRAGMA_(GCC unroll N) +#endif + +#if __clang__ +#define __pragma_loop_unroll_full DO_PRAGMA_(clang loop unroll(full)) +#else +#define __pragma_loop_unroll_full DO_PRAGMA_(GCC unroll 65534) +#endif + +#if __clang__ +#define __pragma_loop_no_unroll DO_PRAGMA_(clang loop unroll(disable)) +#else +#define __pragma_loop_no_unroll DO_PRAGMA_(GCC unroll 1) +#endif + +#endif diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c index 610c2427fd93..3500e4b69ebe 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c @@ -27,32 +27,6 @@ bool is_cgroup1 = 0; struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; void bpf_cgroup_release(struct cgroup *cgrp) __ksym; -static void __on_lookup(struct cgroup *cgrp) -{ - bpf_cgrp_storage_delete(&map_a, cgrp); - bpf_cgrp_storage_delete(&map_b, cgrp); -} - -SEC("fentry/bpf_local_storage_lookup") -int BPF_PROG(on_lookup) -{ - struct task_struct *task = bpf_get_current_task_btf(); - struct cgroup *cgrp; - - if (is_cgroup1) { - cgrp = bpf_task_get_cgroup1(task, target_hid); - if (!cgrp) - return 0; - - __on_lookup(cgrp); - bpf_cgroup_release(cgrp); - return 0; - } - - __on_lookup(task->cgroups->dfl_cgrp); - return 0; -} - static void __on_update(struct cgroup *cgrp) { long *ptr; diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c index ca8aa2f116b3..2ef0e0c46d17 100644 --- a/tools/testing/selftests/bpf/progs/connect_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c @@ -28,8 +28,7 @@ int connect_unix_prog(struct bpf_sock_addr *ctx) if (sa_kern->uaddrlen != unaddrlen) return 0; - sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, - bpf_core_type_id_kernel(struct sockaddr_un)); + sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un); if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) return 0; diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index 0cd4aebb97cf..c705d8112a35 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -23,41 +23,42 @@ struct array_map { __uint(max_entries, 1); } __cpumask_map SEC(".maps"); -struct bpf_cpumask *bpf_cpumask_create(void) __ksym; -void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym; -struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; -u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; -u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; +struct bpf_cpumask *bpf_cpumask_create(void) __ksym __weak; +void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym __weak; +struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym __weak; +u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym __weak; +u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym __weak; u32 bpf_cpumask_first_and(const struct cpumask *src1, - const struct cpumask *src2) __ksym; -void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; -void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; -bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym; -bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; -bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; -void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym; -void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym; + const struct cpumask *src2) __ksym __weak; +void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak; +void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak; +bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym __weak; +bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak; +bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak; +void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym __weak; +void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym __weak; bool bpf_cpumask_and(struct bpf_cpumask *cpumask, const struct cpumask *src1, - const struct cpumask *src2) __ksym; + const struct cpumask *src2) __ksym __weak; void bpf_cpumask_or(struct bpf_cpumask *cpumask, const struct cpumask *src1, - const struct cpumask *src2) __ksym; + const struct cpumask *src2) __ksym __weak; void bpf_cpumask_xor(struct bpf_cpumask *cpumask, const struct cpumask *src1, - const struct cpumask *src2) __ksym; -bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym; -bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym; -bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym; -bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym; -bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym; -void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym; -u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym; -u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; -u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; - -void bpf_rcu_read_lock(void) __ksym; -void bpf_rcu_read_unlock(void) __ksym; + const struct cpumask *src2) __ksym __weak; +bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak; +bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak; +bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak; +bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym __weak; +bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym __weak; +void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym __weak; +u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak; +u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, + const struct cpumask *src2) __ksym __weak; +u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak; + +void bpf_rcu_read_lock(void) __ksym __weak; +void bpf_rcu_read_unlock(void) __ksym __weak; static inline const struct cpumask *cast(struct bpf_cpumask *cpumask) { diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c index 9c078f34bbb2..5a76754f846b 100644 --- a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c @@ -27,8 +27,7 @@ int getpeername_unix_prog(struct bpf_sock_addr *ctx) if (sa_kern->uaddrlen != unaddrlen) return 1; - sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, - bpf_core_type_id_kernel(struct sockaddr_un)); + sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un); if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) return 1; diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c index ac7145111497..7867113c696f 100644 --- a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c @@ -27,8 +27,7 @@ int getsockname_unix_prog(struct bpf_sock_addr *ctx) if (sa_kern->uaddrlen != unaddrlen) return 1; - sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, - bpf_core_type_id_kernel(struct sockaddr_un)); + sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un); if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) return 1; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 225f02dd66d0..3db416606f2f 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -5,6 +5,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_compiler.h" #define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) @@ -183,7 +184,7 @@ int iter_pragma_unroll_loop(const void *ctx) MY_PID_GUARD(); bpf_iter_num_new(&it, 0, 2); -#pragma nounroll + __pragma_loop_no_unroll for (i = 0; i < 3; i++) { v = bpf_iter_num_next(&it); bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1); @@ -238,7 +239,7 @@ int iter_multiple_sequential_loops(const void *ctx) bpf_iter_num_destroy(&it); bpf_iter_num_new(&it, 0, 2); -#pragma nounroll + __pragma_loop_no_unroll for (i = 0; i < 3; i++) { v = bpf_iter_num_next(&it); bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1); diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c index b35337926d66..0de0357f57cc 100644 --- a/tools/testing/selftests/bpf/progs/loop4.c +++ b/tools/testing/selftests/bpf/progs/loop4.c @@ -3,6 +3,8 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_compiler.h" + char _license[] SEC("license") = "GPL"; SEC("socket") @@ -10,7 +12,7 @@ int combinations(volatile struct __sk_buff* skb) { int ret = 0, i; -#pragma nounroll + __pragma_loop_no_unroll for (i = 0; i < 20; i++) if (skb->len) ret |= 1 << i; diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index 3325da17ec81..efaf622c28dd 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -316,7 +316,7 @@ struct lpm_trie { } __attribute__((preserve_access_index)); struct lpm_key { - struct bpf_lpm_trie_key trie_key; + struct bpf_lpm_trie_key_hdr trie_key; __u32 data; }; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index de3b6e4e4d0a..6957d9f2805e 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -8,6 +8,7 @@ #include "profiler.h" #include "err.h" #include "bpf_experimental.h" +#include "bpf_compiler.h" #ifndef NULL #define NULL 0 @@ -169,7 +170,7 @@ static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, int spid) { #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) if (arr_struct->array[i].meta.pid == spid) @@ -185,7 +186,7 @@ static INLINE void populate_ancestors(struct task_struct* task, ancestors_data->num_ancestors = 0; #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { parent = BPF_CORE_READ(parent, real_parent); @@ -212,7 +213,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, size_t filepart_length; #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { filepart_length = @@ -261,7 +262,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, pids_cgrp_id___local); #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys_state* subsys = @@ -402,7 +403,7 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) if (kill_data == NULL) return 0; #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) if (arr_struct->array[i].meta.pid == 0) { @@ -482,7 +483,7 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) struct dentry* parent_dentry; #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < MAX_PATH_DEPTH; i++) { filepart_length = @@ -508,7 +509,7 @@ is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) { struct dentry* parent_dentry; #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < MAX_PATH_DEPTH; i++) { u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); @@ -629,7 +630,7 @@ int raw_tracepoint__sched_process_exit(void* ctx) struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 026d573ce179..86484f07e1d1 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -8,6 +8,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_compiler.h" #define FUNCTION_NAME_LEN 64 #define FILE_NAME_LEN 128 @@ -298,11 +299,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) #if defined(USE_ITER) /* no for loop, no unrolling */ #elif defined(NO_UNROLL) -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll #elif defined(UNROLL_COUNT) -#pragma clang loop unroll_count(UNROLL_COUNT) + __pragma_loop_unroll_count(UNROLL_COUNT) #else -#pragma clang loop unroll(full) + __pragma_loop_unroll_full #endif /* NO_UNROLL */ /* Unwind python stack */ #ifdef USE_ITER diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 14fb01437fb8..ab3a532b7dd6 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -319,3 +319,123 @@ int cross_rcu_region(void *ctx) bpf_rcu_read_unlock(); return 0; } + +__noinline +static int static_subprog(void *ctx) +{ + volatile int ret = 0; + + if (bpf_get_prandom_u32()) + return ret + 42; + return ret + bpf_get_prandom_u32(); +} + +__noinline +int global_subprog(u64 a) +{ + volatile int ret = a; + + return ret + static_subprog(NULL); +} + +__noinline +static int static_subprog_lock(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + if (bpf_get_prandom_u32()) + return ret + 42; + return ret + bpf_get_prandom_u32(); +} + +__noinline +int global_subprog_lock(u64 a) +{ + volatile int ret = a; + + return ret + static_subprog_lock(NULL); +} + +__noinline +static int static_subprog_unlock(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_unlock(); + if (bpf_get_prandom_u32()) + return ret + 42; + return ret + bpf_get_prandom_u32(); +} + +__noinline +int global_subprog_unlock(u64 a) +{ + volatile int ret = a; + + return ret + static_subprog_unlock(NULL); +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_subprog(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + if (bpf_get_prandom_u32()) + ret += static_subprog(ctx); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_global_subprog(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + if (bpf_get_prandom_u32()) + ret += global_subprog(ret); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_subprog_lock(void *ctx) +{ + volatile int ret = 0; + + ret += static_subprog_lock(ctx); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_global_subprog_lock(void *ctx) +{ + volatile int ret = 0; + + ret += global_subprog_lock(ret); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_subprog_unlock(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + ret += static_subprog_unlock(ctx); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_global_subprog_unlock(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + ret += global_subprog_unlock(ret); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c index 4dfbc8552558..1c7ab44bccfa 100644 --- a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c @@ -27,8 +27,7 @@ int recvmsg_unix_prog(struct bpf_sock_addr *ctx) if (sa_kern->uaddrlen != unaddrlen) return 1; - sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, - bpf_core_type_id_kernel(struct sockaddr_un)); + sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un); if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS, sizeof(SERVUN_ADDRESS) - 1) != 0) return 1; diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c index 1f67e832666e..d8869b03dda9 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c @@ -28,8 +28,7 @@ int sendmsg_unix_prog(struct bpf_sock_addr *ctx) if (sa_kern->uaddrlen != unaddrlen) return 0; - sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, - bpf_core_type_id_kernel(struct sockaddr_un)); + sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un); if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) return 0; diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c index 3e745793b27a..46d6eb2a3b17 100644 --- a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c +++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c @@ -12,8 +12,6 @@ int cookie_found = 0; __u64 cookie = 0; __u32 omem = 0; -void *bpf_rdonly_cast(void *, __u32) __ksym; - struct { __uint(type, BPF_MAP_TYPE_SK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); @@ -29,7 +27,7 @@ int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage) if (local_storage_ptr != local_storage) return 0; - sk = bpf_rdonly_cast(sk_ptr, bpf_core_type_id_kernel(struct sock)); + sk = bpf_core_cast(sk_ptr, struct sock); if (sk->sk_cookie.counter != cookie) return 0; diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c index ffbbfe1fa1c1..96531b0d9d55 100644 --- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -32,7 +32,7 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) if (!sk) return 0; - sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + sk = bpf_core_cast(sk, struct sock); if (sk->sk_family != AF_INET6 || sk->sk_state != TCP_LISTEN || !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) @@ -68,7 +68,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx) if (!sk) return 0; - sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + sk = bpf_core_cast(sk, struct sock); if (sk->sk_family != AF_INET6 || !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) return 0; diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 40df2cc26eaf..f74459eead26 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -10,6 +10,8 @@ #include <linux/types.h> #include <bpf/bpf_helpers.h> +#include "bpf_compiler.h" + typedef uint32_t pid_t; struct task_struct {}; @@ -419,9 +421,9 @@ static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg, } #ifdef NO_UNROLL -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll #else -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) { if (i >= map.cnt) @@ -560,25 +562,25 @@ static void *read_strobe_meta(struct task_struct *task, payload_off = sizeof(data->payload); #else #ifdef NO_UNROLL -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll #else -#pragma unroll + __pragma_loop_unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_INTS; ++i) { read_int_var(cfg, i, tls_base, &value, data); } #ifdef NO_UNROLL -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll #else -#pragma unroll + __pragma_loop_unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_STRS; ++i) { payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off); } #ifdef NO_UNROLL -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll #else -#pragma unroll + __pragma_loop_unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_MAPS; ++i) { payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off); diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c new file mode 100644 index 000000000000..b450f72e744a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +pid_t tgid = 0; + +/* This is a test BPF program that uses struct_ops to access an argument + * that may be NULL. This is a test for the verifier to ensure that it can + * rip PTR_MAYBE_NULL correctly. + */ +SEC("struct_ops/test_maybe_null") +int BPF_PROG(test_maybe_null, int dummy, + struct task_struct *task) +{ + if (task) + tgid = task->tgid; + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_1 = { + .test_maybe_null = (void *)test_maybe_null, +}; + diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c new file mode 100644 index 000000000000..6283099ec383 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +pid_t tgid = 0; + +SEC("struct_ops/test_maybe_null_struct_ptr") +int BPF_PROG(test_maybe_null_struct_ptr, int dummy, + struct task_struct *task) +{ + tgid = task->tgid; + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_struct_ptr = { + .test_maybe_null = (void *)test_maybe_null_struct_ptr, +}; + diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c index e44ac55195ca..25952fa09348 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_module.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c @@ -16,15 +16,22 @@ int BPF_PROG(test_1) } SEC("struct_ops/test_2") -int BPF_PROG(test_2, int a, int b) +void BPF_PROG(test_2, int a, int b) { test_2_result = a + b; - return a + b; +} + +SEC("struct_ops/test_3") +int BPF_PROG(test_3, int a, int b) +{ + test_2_result = a + b + 3; + return a + b + 3; } SEC(".struct_ops.link") struct bpf_testmod_ops testmod_1 = { .test_1 = (void *)test_1, .test_2 = (void *)test_2, + .data = 0x1, }; diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c index 4542dc683b44..f1853c38aada 100644 --- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c @@ -27,23 +27,6 @@ struct { __type(value, long); } map_b SEC(".maps"); -SEC("fentry/bpf_local_storage_lookup") -int BPF_PROG(on_lookup) -{ - struct task_struct *task = bpf_get_current_task_btf(); - - if (!test_pid || task->pid != test_pid) - return 0; - - /* The bpf_task_storage_delete will call - * bpf_local_storage_lookup. The prog->active will - * stop the recursion. - */ - bpf_task_storage_delete(&map_a, task); - bpf_task_storage_delete(&map_b, task); - return 0; -} - SEC("fentry/bpf_local_storage_update") int BPF_PROG(on_update) { diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index 66b304982245..683c8aaa63da 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -20,8 +20,11 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include "bpf_compiler.h" #include "test_cls_redirect.h" +#pragma GCC diagnostic ignored "-Waddress-of-packed-member" + #ifdef SUBPROGS #define INLINING __noinline #else @@ -267,7 +270,7 @@ static INLINING void pkt_ipv4_checksum(struct iphdr *iph) uint32_t acc = 0; uint16_t *ipw = (uint16_t *)iph; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) { acc += ipw[i]; } @@ -294,7 +297,7 @@ bool pkt_skip_ipv6_extension_headers(buf_t *pkt, }; *is_fragment = false; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (int i = 0; i < 6; i++) { switch (exthdr.next) { case IPPROTO_FRAGMENT: diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c index f41c81212ee9..da54c09e9a15 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -23,6 +23,8 @@ #include "test_cls_redirect.h" #include "bpf_kfuncs.h" +#pragma GCC diagnostic ignored "-Waddress-of-packed-member" + #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c index 17a9f59bf5f3..fc69ff18880d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func1.c +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -5,7 +5,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#define MAX_STACK (512 - 3 * 32 + 8) +#define MAX_STACK 260 static __attribute__ ((noinline)) int f0(int var, struct __sk_buff *skb) @@ -30,6 +30,10 @@ int f3(int, struct __sk_buff *skb, int); __attribute__ ((noinline)) int f2(int val, struct __sk_buff *skb) { + volatile char buf[MAX_STACK] = {}; + + __sink(buf[MAX_STACK - 1]); + return f1(skb) + f3(val, skb, 1); } @@ -44,7 +48,7 @@ int f3(int val, struct __sk_buff *skb, int var) } SEC("tc") -__failure __msg("combined stack size of 4 calls is 544") +__failure __msg("combined stack size of 3 calls is") int global_func1(struct __sk_buff *skb) { return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4); diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c index 9a06e5eb1fbe..143c8a4852bf 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c +++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c @@ -26,6 +26,23 @@ int kprobe_typedef_ctx(void *ctx) return kprobe_typedef_ctx_subprog(ctx); } +/* s390x defines: + * + * typedef user_pt_regs bpf_user_pt_regs_t; + * typedef struct { ... } user_pt_regs; + * + * And so "canonical" underlying struct type is anonymous. + * So on s390x only valid ways to have PTR_TO_CTX argument in global subprogs + * are: + * - bpf_user_pt_regs_t *ctx (typedef); + * - struct bpf_user_pt_regs_t *ctx (backwards compatible struct hack); + * - void *ctx __arg_ctx (arg:ctx tag) + * + * Other architectures also allow using underlying struct types (e.g., + * `struct pt_regs *ctx` for x86-64) + */ +#ifndef bpf_target_s390 + #define pt_regs_struct_t typeof(*(__PT_REGS_CAST((struct pt_regs *)NULL))) __weak int kprobe_struct_ctx_subprog(pt_regs_struct_t *ctx) @@ -40,6 +57,8 @@ int kprobe_resolved_ctx(void *ctx) return kprobe_struct_ctx_subprog(ctx); } +#endif + /* this is current hack to make this work on old kernels */ struct bpf_user_pt_regs_t {}; diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c index 48ff2b2ad5e7..fed66f36adb6 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c @@ -6,6 +6,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include "bpf_compiler.h" + /* Packet parsing state machine helpers. */ #define cursor_advance(_cursor, _len) \ ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) @@ -131,7 +133,7 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, *pad_off = 0; // we can only go as far as ~10 TLVs due to the BPF max stack size - #pragma clang loop unroll(full) + __pragma_loop_unroll_full for (int i = 0; i < 10; i++) { struct sr6_tlv_t tlv; @@ -302,7 +304,7 @@ int __encap_srh(struct __sk_buff *skb) seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh)); - #pragma clang loop unroll(full) + __pragma_loop_unroll_full for (unsigned long long lo = 0; lo < 4; lo++) { seg->lo = bpf_cpu_to_be64(4 - lo); seg->hi = bpf_cpu_to_be64(hi); diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c index 4bdd65b5aa2d..2fdc44e76624 100644 --- a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c +++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c @@ -6,13 +6,13 @@ char tp_name[128]; -SEC("lsm/bpf") +SEC("lsm.s/bpf") int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) { switch (cmd) { case BPF_RAW_TRACEPOINT_OPEN: - bpf_probe_read_user_str(tp_name, sizeof(tp_name) - 1, - (void *)attr->raw_tracepoint.name); + bpf_copy_from_user(tp_name, sizeof(tp_name) - 1, + (void *)attr->raw_tracepoint.name); break; default: break; diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c index a7278f064368..5059050f74f6 100644 --- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c +++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c @@ -6,6 +6,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include "bpf_compiler.h" + /* Packet parsing state machine helpers. */ #define cursor_advance(_cursor, _len) \ ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) @@ -134,7 +136,7 @@ static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb, // we can only go as far as ~10 TLVs due to the BPF max stack size // workaround: define induction variable "i" as "long" instead // of "int" to prevent alu32 sub-register spilling. - #pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (long i = 0; i < 100; i++) { struct sr6_tlv_t tlv; diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index c482110cfc95..a724a70c6700 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -3,12 +3,14 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_compiler.h" + char _license[] SEC("license") = "GPL"; SEC("tc") int process(struct __sk_buff *skb) { - #pragma clang loop unroll(full) + __pragma_loop_unroll_full for (int i = 0; i < 5; i++) { if (skb->cb[i] != i + 1) return 1; diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index b2440a0ff422..d8d77bdffd3d 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -101,4 +101,69 @@ int bpf_spin_lock_test(struct __sk_buff *skb) err: return err; } + +struct bpf_spin_lock lockA __hidden SEC(".data.A"); + +__noinline +static int static_subprog(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + if (ctx->protocol) + return ret; + return ret + ctx->len; +} + +__noinline +static int static_subprog_lock(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + ret = static_subprog(ctx); + bpf_spin_lock(&lockA); + return ret + ctx->len; +} + +__noinline +static int static_subprog_unlock(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + ret = static_subprog(ctx); + bpf_spin_unlock(&lockA); + return ret + ctx->len; +} + +SEC("tc") +int lock_static_subprog_call(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = static_subprog(ctx); + bpf_spin_unlock(&lockA); + return ret; +} + +SEC("tc") +int lock_static_subprog_lock(struct __sk_buff *ctx) +{ + int ret = 0; + + ret = static_subprog_lock(ctx); + bpf_spin_unlock(&lockA); + return ret; +} + +SEC("tc") +int lock_static_subprog_unlock(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + ret = static_subprog_unlock(ctx); + return ret; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c index 86cd183ef6dc..43f40c4fe241 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c @@ -201,4 +201,48 @@ CHECK(innermapval_mapval, &iv->lock, &v->lock); #undef CHECK +__noinline +int global_subprog(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + if (ctx->protocol) + ret += ctx->protocol; + return ret + ctx->mark; +} + +__noinline +static int static_subprog_call_global(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + if (ctx->protocol) + return ret; + return ret + ctx->len + global_subprog(ctx); +} + +SEC("?tc") +int lock_global_subprog_call1(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = global_subprog(ctx); + bpf_spin_unlock(&lockA); + return ret; +} + +SEC("?tc") +int lock_global_subprog_call2(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = static_subprog_call_global(ctx); + bpf_spin_unlock(&lockA); + return ret; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c index 553a282d816a..7f74077d6622 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -9,6 +9,8 @@ #include <bpf/bpf_helpers.h> +#include "bpf_compiler.h" + #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif @@ -30,7 +32,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret != sizeof(tcp_mem_name) - 1) return 0; -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (i = 0; i < sizeof(tcp_mem_name); ++i) if (name[i] != tcp_mem_name[i]) return 0; @@ -59,7 +61,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret >= MAX_VALUE_STR_LEN) return 0; -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) { ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0, tcp_mem + i); diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c index 2b64bc563a12..68a75436e8af 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c @@ -9,6 +9,8 @@ #include <bpf/bpf_helpers.h> +#include "bpf_compiler.h" + #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif @@ -30,7 +32,7 @@ static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret != sizeof(tcp_mem_name) - 1) return 0; -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (i = 0; i < sizeof(tcp_mem_name); ++i) if (name[i] != tcp_mem_name[i]) return 0; @@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret >= MAX_VALUE_STR_LEN) return 0; -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) { ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0, tcp_mem + i); diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index 5489823c83fc..efc3c61f7852 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -9,6 +9,8 @@ #include <bpf/bpf_helpers.h> +#include "bpf_compiler.h" + /* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */ #define MAX_ULONG_STR_LEN 0xF @@ -31,7 +33,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret != sizeof(tcp_mem_name) - 1) return 0; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (i = 0; i < sizeof(tcp_mem_name); ++i) if (name[i] != tcp_mem_name[i]) return 0; @@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret >= MAX_VALUE_STR_LEN) return 0; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) { ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0, tcp_mem + i); diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index e6e678aa9874..404124a93892 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -19,6 +19,9 @@ #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> +#include "bpf_compiler.h" + +#pragma GCC diagnostic ignored "-Waddress-of-packed-member" static const int cfg_port = 8000; @@ -81,7 +84,7 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph) iph->check = 0; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++) csum += *iph16++; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c index a5501b29979a..c8e4553648bf 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -10,6 +10,8 @@ #include "test_siphash.h" #include "test_tcp_custom_syncookie.h" +#define MAX_PACKET_OFF 0xffff + /* Hash is calculated for each client and split into ISN and TS. * * MSB LSB @@ -52,16 +54,15 @@ static siphash_key_t test_key_siphash = { struct tcp_syncookie { struct __sk_buff *skb; + void *data; void *data_end; struct ethhdr *eth; struct iphdr *ipv4; struct ipv6hdr *ipv6; struct tcphdr *tcp; - union { - char *ptr; - __be32 *ptr32; - }; + __be32 *ptr32; struct bpf_tcp_req_attrs attrs; + u32 off; u32 cookie; u64 first; }; @@ -70,6 +71,7 @@ bool handled_syn, handled_ack; static int tcp_load_headers(struct tcp_syncookie *ctx) { + ctx->data = (void *)(long)ctx->skb->data; ctx->data_end = (void *)(long)ctx->skb->data_end; ctx->eth = (struct ethhdr *)(long)ctx->skb->data; @@ -134,6 +136,7 @@ static int tcp_reload_headers(struct tcp_syncookie *ctx) if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0)) goto err; + ctx->data = (void *)(long)ctx->skb->data; ctx->data_end = (void *)(long)ctx->skb->data_end; ctx->eth = (struct ethhdr *)(long)ctx->skb->data; if (ctx->ipv4) { @@ -195,47 +198,68 @@ err: return -1; } -static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) +static __always_inline void *next(struct tcp_syncookie *ctx, __u32 sz) { - char opcode, opsize; + __u64 off = ctx->off; + __u8 *data; - if (ctx->ptr + 1 > ctx->data_end) - goto stop; + /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ + if (off > MAX_PACKET_OFF - sz) + return NULL; + + data = ctx->data + off; + barrier_var(data); + if (data + sz >= ctx->data_end) + return NULL; - opcode = *ctx->ptr++; + ctx->off += sz; + return data; +} - if (opcode == TCPOPT_EOL) +static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) +{ + __u8 *opcode, *opsize, *wscale; + __u32 *tsval, *tsecr; + __u16 *mss; + __u32 off; + + off = ctx->off; + opcode = next(ctx, 1); + if (!opcode) goto stop; - if (opcode == TCPOPT_NOP) + if (*opcode == TCPOPT_EOL) + goto stop; + + if (*opcode == TCPOPT_NOP) goto next; - if (ctx->ptr + 1 > ctx->data_end) + opsize = next(ctx, 1); + if (!opsize) goto stop; - opsize = *ctx->ptr++; - - if (opsize < 2) + if (*opsize < 2) goto stop; - switch (opcode) { + switch (*opcode) { case TCPOPT_MSS: - if (opsize == TCPOLEN_MSS && ctx->tcp->syn && - ctx->ptr + (TCPOLEN_MSS - 2) < ctx->data_end) - ctx->attrs.mss = get_unaligned_be16(ctx->ptr); + mss = next(ctx, 2); + if (*opsize == TCPOLEN_MSS && ctx->tcp->syn && mss) + ctx->attrs.mss = get_unaligned_be16(mss); break; case TCPOPT_WINDOW: - if (opsize == TCPOLEN_WINDOW && ctx->tcp->syn && - ctx->ptr + (TCPOLEN_WINDOW - 2) < ctx->data_end) { + wscale = next(ctx, 1); + if (*opsize == TCPOLEN_WINDOW && ctx->tcp->syn && wscale) { ctx->attrs.wscale_ok = 1; - ctx->attrs.snd_wscale = *ctx->ptr; + ctx->attrs.snd_wscale = *wscale; } break; case TCPOPT_TIMESTAMP: - if (opsize == TCPOLEN_TIMESTAMP && - ctx->ptr + (TCPOLEN_TIMESTAMP - 2) < ctx->data_end) { - ctx->attrs.rcv_tsval = get_unaligned_be32(ctx->ptr); - ctx->attrs.rcv_tsecr = get_unaligned_be32(ctx->ptr + 4); + tsval = next(ctx, 4); + tsecr = next(ctx, 4); + if (*opsize == TCPOLEN_TIMESTAMP && tsval && tsecr) { + ctx->attrs.rcv_tsval = get_unaligned_be32(tsval); + ctx->attrs.rcv_tsecr = get_unaligned_be32(tsecr); if (ctx->tcp->syn && ctx->attrs.rcv_tsecr) ctx->attrs.tstamp_ok = 0; @@ -244,13 +268,12 @@ static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) } break; case TCPOPT_SACK_PERM: - if (opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn && - ctx->ptr + (TCPOLEN_SACK_PERM - 2) < ctx->data_end) + if (*opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn) ctx->attrs.sack_ok = 1; break; } - ctx->ptr += opsize - 2; + ctx->off = off + *opsize; next: return 0; stop: @@ -259,7 +282,7 @@ stop: static void tcp_parse_options(struct tcp_syncookie *ctx) { - ctx->ptr = (char *)(ctx->tcp + 1); + ctx->off = (__u8 *)(ctx->tcp + 1) - (__u8 *)ctx->data, bpf_loop(40, tcp_parse_option, ctx, 0); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index d7a9a74b7245..8caf58be5818 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -19,6 +19,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #include "test_iptunnel_common.h" +#include "bpf_compiler.h" struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); @@ -137,7 +138,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp) iph->ttl = 8; next_iph = (__u16 *)iph; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (i = 0; i < sizeof(*iph) >> 1; i++) csum += *next_iph++; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c index c98fb44156f0..93267a68825b 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c @@ -15,6 +15,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #include "test_iptunnel_common.h" +#include "bpf_compiler.h" struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); @@ -133,7 +134,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp) iph->ttl = 8; next_iph = (__u16 *)iph; -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (i = 0; i < sizeof(*iph) >> 1; i++) csum += *next_iph++; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 42c8f6ded0e4..5c7e4758a0ca 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -15,6 +15,7 @@ #include <linux/udp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include "bpf_compiler.h" static __always_inline __u32 rol32(__u32 word, unsigned int shift) { @@ -362,7 +363,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, iph->ttl = 4; next_iph_u16 = (__u16 *) iph; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) csum += *next_iph_u16++; iph->check = ~((csum & 0xffff) + (csum >> 16)); @@ -409,7 +410,7 @@ int send_icmp_reply(void *data, void *data_end) iph->saddr = tmp_addr; iph->check = 0; next_iph_u16 = (__u16 *) iph; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) csum += *next_iph_u16++; iph->check = ~((csum & 0xffff) + (csum >> 16)); diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c new file mode 100644 index 000000000000..d41665d2ec8c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_failure.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("?fentry/bpf_spin_lock") +int BPF_PROG(test_spin_lock, struct bpf_spin_lock *lock) +{ + return 0; +} + +SEC("?fentry/bpf_spin_unlock") +int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c index a9629ac230fd..9d808b8f4ab0 100644 --- a/tools/testing/selftests/bpf/progs/type_cast.c +++ b/tools/testing/selftests/bpf/progs/type_cast.c @@ -4,6 +4,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); @@ -19,9 +20,6 @@ char name[IFNAMSIZ]; unsigned int inum; unsigned int meta_len, frag0_len, kskb_len, kskb2_len; -void *bpf_cast_to_kern_ctx(void *) __ksym; -void *bpf_rdonly_cast(void *, __u32) __ksym; - SEC("?xdp") int md_xdp(struct xdp_md *ctx) { @@ -48,13 +46,12 @@ int md_skb(struct __sk_buff *skb) /* Simulate the following kernel macro: * #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) */ - shared_info = bpf_rdonly_cast(kskb->head + kskb->end, - bpf_core_type_id_kernel(struct skb_shared_info)); + shared_info = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info); meta_len = shared_info->meta_len; frag0_len = shared_info->frag_list->len; /* kskb2 should be equal to kskb */ - kskb2 = bpf_rdonly_cast(kskb, bpf_core_type_id_kernel(struct sk_buff)); + kskb2 = bpf_core_cast(kskb, typeof(*kskb2)); kskb2_len = kskb2->len; return 0; } @@ -65,7 +62,7 @@ int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id) struct task_struct *task, *task_dup; task = bpf_get_current_task_btf(); - task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct)); + task_dup = bpf_core_cast(task, struct task_struct); (void)bpf_task_storage_get(&enter_id, task_dup, 0, 0); return 0; } @@ -73,7 +70,7 @@ int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id) SEC("?tracepoint/syscalls/sys_enter_nanosleep") int kctx_u64(void *ctx) { - u64 *kctx = bpf_rdonly_cast(ctx, bpf_core_type_id_kernel(u64)); + u64 *kctx = bpf_core_cast(ctx, u64); (void)kctx; return 0; diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c new file mode 100644 index 000000000000..4ab0ef18d7eb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" +#include "xdp_metadata.h" +#include "bpf_kfuncs.h" + +extern struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak; +extern void bpf_task_release(struct task_struct *p) __ksym __weak; + +__weak int subprog_trusted_task_nullable(struct task_struct *task __arg_trusted __arg_nullable) +{ + if (!task) + return 0; + return task->pid + task->tgid; +} + +__weak int subprog_trusted_task_nullable_extra_layer(struct task_struct *task __arg_trusted __arg_nullable) +{ + return subprog_trusted_task_nullable(task) + subprog_trusted_task_nullable(NULL); +} + +SEC("?tp_btf/task_newtask") +__success __log_level(2) +__msg("Validating subprog_trusted_task_nullable() func#1...") +__msg(": R1=trusted_ptr_or_null_task_struct(") +int trusted_task_arg_nullable(void *ctx) +{ + struct task_struct *t1 = bpf_get_current_task_btf(); + struct task_struct *t2 = bpf_task_acquire(t1); + int res = 0; + + /* known NULL */ + res += subprog_trusted_task_nullable(NULL); + + /* known non-NULL */ + res += subprog_trusted_task_nullable(t1); + res += subprog_trusted_task_nullable_extra_layer(t1); + + /* unknown if NULL or not */ + res += subprog_trusted_task_nullable(t2); + res += subprog_trusted_task_nullable_extra_layer(t2); + + if (t2) { + /* known non-NULL after explicit NULL check, just in case */ + res += subprog_trusted_task_nullable(t2); + res += subprog_trusted_task_nullable_extra_layer(t2); + + bpf_task_release(t2); + } + + return res; +} + +__weak int subprog_trusted_task_nonnull(struct task_struct *task __arg_trusted) +{ + return task->pid + task->tgid; +} + +SEC("?kprobe") +__failure __log_level(2) +__msg("R1 type=scalar expected=ptr_, trusted_ptr_, rcu_ptr_") +__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')") +int trusted_task_arg_nonnull_fail1(void *ctx) +{ + return subprog_trusted_task_nonnull(NULL); +} + +SEC("?tp_btf/task_newtask") +__failure __log_level(2) +__msg("R1 type=ptr_or_null_ expected=ptr_, trusted_ptr_, rcu_ptr_") +__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')") +int trusted_task_arg_nonnull_fail2(void *ctx) +{ + struct task_struct *t = bpf_get_current_task_btf(); + struct task_struct *nullable; + int res; + + nullable = bpf_task_acquire(t); + + /* should fail, PTR_TO_BTF_ID_OR_NULL */ + res = subprog_trusted_task_nonnull(nullable); + + if (nullable) + bpf_task_release(nullable); + + return res; +} + +SEC("?kprobe") +__success __log_level(2) +__msg("Validating subprog_trusted_task_nonnull() func#1...") +__msg(": R1=trusted_ptr_task_struct(") +int trusted_task_arg_nonnull(void *ctx) +{ + struct task_struct *t = bpf_get_current_task_btf(); + + return subprog_trusted_task_nonnull(t); +} + +struct task_struct___local {} __attribute__((preserve_access_index)); + +__weak int subprog_nullable_task_flavor( + struct task_struct___local *task __arg_trusted __arg_nullable) +{ + char buf[16]; + + if (!task) + return 0; + + return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0); +} + +SEC("?uprobe.s") +__success __log_level(2) +__msg("Validating subprog_nullable_task_flavor() func#1...") +__msg(": R1=trusted_ptr_or_null_task_struct(") +int flavor_ptr_nullable(void *ctx) +{ + struct task_struct___local *t = (void *)bpf_get_current_task_btf(); + + return subprog_nullable_task_flavor(t); +} + +__weak int subprog_nonnull_task_flavor(struct task_struct___local *task __arg_trusted) +{ + char buf[16]; + + return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0); +} + +SEC("?uprobe.s") +__success __log_level(2) +__msg("Validating subprog_nonnull_task_flavor() func#1...") +__msg(": R1=trusted_ptr_task_struct(") +int flavor_ptr_nonnull(void *ctx) +{ + struct task_struct *t = bpf_get_current_task_btf(); + + return subprog_nonnull_task_flavor((void *)t); +} + +__weak int subprog_trusted_destroy(struct task_struct *task __arg_trusted) +{ + bpf_task_release(task); /* should be rejected */ + + return 0; +} + +SEC("?tp_btf/task_newtask") +__failure __log_level(2) +__msg("release kernel function bpf_task_release expects refcounted PTR_TO_BTF_ID") +int BPF_PROG(trusted_destroy_fail, struct task_struct *task, u64 clone_flags) +{ + return subprog_trusted_destroy(task); +} + +__weak int subprog_trusted_acq_rel(struct task_struct *task __arg_trusted) +{ + struct task_struct *owned; + + owned = bpf_task_acquire(task); + if (!owned) + return 0; + + bpf_task_release(owned); /* this one is OK, we acquired it locally */ + + return 0; +} + +SEC("?tp_btf/task_newtask") +__success __log_level(2) +int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags) +{ + return subprog_trusted_acq_rel(task); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c index 67dddd941891..baff5ffe9405 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -115,6 +115,35 @@ int arg_tag_nullable_ptr_fail(void *ctx) return subprog_nullable_ptr_bad(&x); } +typedef struct { + int x; +} user_struct_t; + +__noinline __weak int subprog_user_anon_mem(user_struct_t *t) +{ + return t ? t->x : 0; +} + +SEC("?tracepoint") +__failure __log_level(2) +__msg("invalid bpf_context access") +__msg("Caller passes invalid args into func#1 ('subprog_user_anon_mem')") +int anon_user_mem_invalid(void *ctx) +{ + /* can't pass PTR_TO_CTX as user memory */ + return subprog_user_anon_mem(ctx); +} + +SEC("?tracepoint") +__success __log_level(2) +__msg("Func#1 ('subprog_user_anon_mem') is safe for any args that match its prototype") +int anon_user_mem_valid(void *ctx) +{ + user_struct_t t = { .x = 42 }; + + return subprog_user_anon_mem(&t); +} + __noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull) { return (*p1) * (*p2); /* good, no need for NULL checks */ diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index 5905e036e0ea..a955a6358206 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -239,4 +239,74 @@ int bpf_loop_iter_limit_nested(void *unused) return 1000 * a + b + c; } +struct iter_limit_bug_ctx { + __u64 a; + __u64 b; + __u64 c; +}; + +static __naked void iter_limit_bug_cb(void) +{ + /* This is the same as C code below, but written + * in assembly to control which branches are fall-through. + * + * switch (bpf_get_prandom_u32()) { + * case 1: ctx->a = 42; break; + * case 2: ctx->b = 42; break; + * default: ctx->c = 42; break; + * } + */ + asm volatile ( + "r9 = r2;" + "call %[bpf_get_prandom_u32];" + "r1 = r0;" + "r2 = 42;" + "r0 = 0;" + "if r1 == 0x1 goto 1f;" + "if r1 == 0x2 goto 2f;" + "*(u64 *)(r9 + 16) = r2;" + "exit;" + "1: *(u64 *)(r9 + 0) = r2;" + "exit;" + "2: *(u64 *)(r9 + 8) = r2;" + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("tc") +__failure +__flag(BPF_F_TEST_STATE_FREQ) +int iter_limit_bug(struct __sk_buff *skb) +{ + struct iter_limit_bug_ctx ctx = { 7, 7, 7 }; + + bpf_loop(2, iter_limit_bug_cb, &ctx, 0); + + /* This is the same as C code below, + * written in assembly to guarantee checks order. + * + * if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7) + * asm volatile("r1 /= 0;":::"r1"); + */ + asm volatile ( + "r1 = *(u64 *)%[ctx_a];" + "if r1 != 42 goto 1f;" + "r1 = *(u64 *)%[ctx_b];" + "if r1 != 42 goto 1f;" + "r1 = *(u64 *)%[ctx_c];" + "if r1 != 7 goto 1f;" + "r1 /= 0;" + "1:" + : + : [ctx_a]"m"(ctx.a), + [ctx_b]"m"(ctx.b), + [ctx_c]"m"(ctx.c) + : "r1" + ); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 7013a9694163..85e48069c9e6 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -217,7 +217,7 @@ __naked void uninit_u32_from_the_stack(void) SEC("tc") __description("Spill a u32 const scalar. Refill as u16. Offset to skb->data") -__failure __msg("invalid access to packet") +__success __retval(0) __naked void u16_offset_to_skb_data(void) { asm volatile (" \ @@ -225,13 +225,19 @@ __naked void u16_offset_to_skb_data(void) r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ w4 = 20; \ *(u32*)(r10 - 8) = r4; \ - r4 = *(u16*)(r10 - 8); \ + " +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r4 = *(u16*)(r10 - 8);" +#else + "r4 = *(u16*)(r10 - 6);" +#endif + " \ r0 = r2; \ - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\ + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */\ r0 += r4; \ - /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\ + /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\ if r0 > r3 goto l0_%=; \ - /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\ + /* r0 = *(u32 *)r2 R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\ r0 = *(u32*)(r2 + 0); \ l0_%=: r0 = 0; \ exit; \ @@ -268,7 +274,7 @@ l0_%=: r0 = 0; \ } SEC("tc") -__description("Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data") +__description("Spill a u32 const scalar. Refill as u16 from MSB. Offset to skb->data") __failure __msg("invalid access to packet") __naked void _6_offset_to_skb_data(void) { @@ -277,7 +283,13 @@ __naked void _6_offset_to_skb_data(void) r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ w4 = 20; \ *(u32*)(r10 - 8) = r4; \ - r4 = *(u16*)(r10 - 6); \ + " +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r4 = *(u16*)(r10 - 6);" +#else + "r4 = *(u16*)(r10 - 8);" +#endif + " \ r0 = r2; \ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\ r0 += r4; \ @@ -452,9 +464,9 @@ l0_%=: r1 >>= 16; \ SEC("raw_tp") __log_level(2) __success -__msg("fp-8=0m??mmmm") -__msg("fp-16=00mm??mm") -__msg("fp-24=00mm???m") +__msg("fp-8=0m??scalar()") +__msg("fp-16=00mm??scalar()") +__msg("fp-24=00mm???scalar()") __naked void spill_subregs_preserve_stack_zero(void) { asm volatile ( @@ -940,4 +952,296 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("xdp") +__description("spill unbounded reg, then range check src") +__success __retval(0) +__naked void spill_unbounded(void) +{ + asm volatile (" \ + /* Produce an unbounded scalar. */ \ + call %[bpf_get_prandom_u32]; \ + /* Spill r0 to stack. */ \ + *(u64*)(r10 - 8) = r0; \ + /* Boundary check on r0. */ \ + if r0 > 16 goto l0_%=; \ + /* Fill r0 from stack. */ \ + r0 = *(u64*)(r10 - 8); \ + /* Boundary check on r0 with predetermined result. */\ + if r0 <= 16 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("32-bit fill after 64-bit spill") +__success __retval(0) +__naked void fill_32bit_after_spill_64bit(void) +{ + asm volatile(" \ + /* Randomize the upper 32 bits. */ \ + call %[bpf_get_prandom_u32]; \ + r0 <<= 32; \ + /* 64-bit spill r0 to stack. */ \ + *(u64*)(r10 - 8) = r0; \ + /* 32-bit fill r0 from stack. */ \ + " +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(u32*)(r10 - 8);" +#else + "r0 = *(u32*)(r10 - 4);" +#endif + " \ + /* Boundary check on r0 with predetermined result. */\ + if r0 == 0 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ +l0_%=: exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("32-bit fill after 64-bit spill of 32-bit value should preserve ID") +__success __retval(0) +__naked void fill_32bit_after_spill_64bit_preserve_id(void) +{ + asm volatile (" \ + /* Randomize the lower 32 bits. */ \ + call %[bpf_get_prandom_u32]; \ + w0 &= 0xffffffff; \ + /* 64-bit spill r0 to stack - should assign an ID. */\ + *(u64*)(r10 - 8) = r0; \ + /* 32-bit fill r1 from stack - should preserve the ID. */\ + " +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(u32*)(r10 - 8);" +#else + "r1 = *(u32*)(r10 - 4);" +#endif + " \ + /* Compare r1 with another register to trigger find_equal_scalars. */\ + r2 = 0; \ + if r1 != r2 goto l0_%=; \ + /* The result of this comparison is predefined. */\ + if r0 == r2 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ + exit; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("32-bit fill after 64-bit spill should clear ID") +__failure __msg("math between ctx pointer and 4294967295 is not allowed") +__naked void fill_32bit_after_spill_64bit_clear_id(void) +{ + asm volatile (" \ + r6 = r1; \ + /* Roll one bit to force the verifier to track both branches. */\ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x8; \ + /* Put a large number into r1. */ \ + r1 = 0xffffffff; \ + r1 <<= 32; \ + r1 += r0; \ + /* 64-bit spill r1 to stack - should assign an ID. */\ + *(u64*)(r10 - 8) = r1; \ + /* 32-bit fill r2 from stack - should clear the ID. */\ + " +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r2 = *(u32*)(r10 - 8);" +#else + "r2 = *(u32*)(r10 - 4);" +#endif + " \ + /* Compare r2 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. If the ID was mistakenly preserved on fill, this would\ + * cause the verifier to think that r1 is also equal to zero in one of\ + * the branches, and equal to eight on the other branch.\ + */ \ + r3 = 0; \ + if r2 != r3 goto l0_%=; \ +l0_%=: r1 >>= 32; \ + /* The verifier shouldn't propagate r2's range to r1, so it should\ + * still remember r1 = 0xffffffff and reject the below.\ + */ \ + r6 += r1; \ + r0 = *(u32*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* stacksafe(): check if stack spill of an imprecise scalar in old state + * is considered equivalent to STACK_{MISC,INVALID} in cur state. + */ +SEC("socket") +__success __log_level(2) +__msg("8: (79) r1 = *(u64 *)(r10 -8)") +__msg("8: safe") +__msg("processed 11 insns") +/* STACK_INVALID should prevent verifier in unpriv mode from + * considering states equivalent and force an error on second + * verification path (entry - label 1 - label 2). + */ +__failure_unpriv +__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)") +__msg_unpriv("9: (95) exit") +__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)") +__msg_unpriv("invalid read from stack off -8+2 size 8") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void old_imprecise_scalar_vs_cur_stack_misc(void) +{ + asm volatile( + /* get a random value for branching */ + "call %[bpf_ktime_get_ns];" + "if r0 == 0 goto 1f;" + /* conjure scalar at fp-8 */ + "r0 = 42;" + "*(u64*)(r10 - 8) = r0;" + "goto 2f;" +"1:" + /* conjure STACK_{MISC,INVALID} at fp-8 */ + "call %[bpf_ktime_get_ns];" + "*(u16*)(r10 - 8) = r0;" + "*(u16*)(r10 - 4) = r0;" +"2:" + /* read fp-8, should be considered safe on second visit */ + "r1 = *(u64*)(r10 - 8);" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* stacksafe(): check that stack spill of a precise scalar in old state + * is not considered equivalent to STACK_MISC in cur state. + */ +SEC("socket") +__success __log_level(2) +/* verifier should visit 'if r1 == 0x2a ...' two times: + * - once for path entry - label 2; + * - once for path entry - label 1 - label 2. + */ +__msg("if r1 == 0x2a goto pc+0") +__msg("if r1 == 0x2a goto pc+0") +__msg("processed 15 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void old_precise_scalar_vs_cur_stack_misc(void) +{ + asm volatile( + /* get a random value for branching */ + "call %[bpf_ktime_get_ns];" + "if r0 == 0 goto 1f;" + /* conjure scalar at fp-8 */ + "r0 = 42;" + "*(u64*)(r10 - 8) = r0;" + "goto 2f;" +"1:" + /* conjure STACK_MISC at fp-8 */ + "call %[bpf_ktime_get_ns];" + "*(u64*)(r10 - 8) = r0;" + "*(u32*)(r10 - 4) = r0;" +"2:" + /* read fp-8, should not be considered safe on second visit */ + "r1 = *(u64*)(r10 - 8);" + /* use r1 in precise context */ + "if r1 == 42 goto +0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* stacksafe(): check if STACK_MISC in old state is considered + * equivalent to stack spill of a scalar in cur state. + */ +SEC("socket") +__success __log_level(2) +__msg("8: (79) r0 = *(u64 *)(r10 -8)") +__msg("8: safe") +__msg("processed 11 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void old_stack_misc_vs_cur_scalar(void) +{ + asm volatile( + /* get a random value for branching */ + "call %[bpf_ktime_get_ns];" + "if r0 == 0 goto 1f;" + /* conjure STACK_{MISC,INVALID} at fp-8 */ + "call %[bpf_ktime_get_ns];" + "*(u16*)(r10 - 8) = r0;" + "*(u16*)(r10 - 4) = r0;" + "goto 2f;" +"1:" + /* conjure scalar at fp-8 */ + "r0 = 42;" + "*(u64*)(r10 - 8) = r0;" +"2:" + /* read fp-8, should be considered safe on second visit */ + "r0 = *(u64*)(r10 - 8);" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* stacksafe(): check that STACK_MISC in old state is not considered + * equivalent to stack spill of a non-scalar in cur state. + */ +SEC("socket") +__success __log_level(2) +/* verifier should process exit instructions twice: + * - once for path entry - label 2; + * - once for path entry - label 1 - label 2. + */ +__msg("r1 = *(u64 *)(r10 -8)") +__msg("exit") +__msg("r1 = *(u64 *)(r10 -8)") +__msg("exit") +__msg("processed 11 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void old_stack_misc_vs_cur_ctx_ptr(void) +{ + asm volatile( + /* remember context pointer in r9 */ + "r9 = r1;" + /* get a random value for branching */ + "call %[bpf_ktime_get_ns];" + "if r0 == 0 goto 1f;" + /* conjure STACK_MISC at fp-8 */ + "call %[bpf_ktime_get_ns];" + "*(u64*)(r10 - 8) = r0;" + "*(u32*)(r10 - 4) = r0;" + "goto 2f;" +"1:" + /* conjure context pointer in fp-8 */ + "*(u64*)(r10 - 8) = r9;" +"2:" + /* read fp-8, should not be considered safe on second visit */ + "r1 = *(u64*)(r10 - 8);" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c index 9c1aa69650f8..fb316c080c84 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c @@ -330,7 +330,7 @@ l1_%=: r7 = r0; \ SEC("cgroup/skb") __description("spin_lock: test10 lock in subprog without unlock") -__failure __msg("unlock is missing") +__success __failure_unpriv __msg_unpriv("") __naked void lock_in_subprog_without_unlock(void) { diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 518329c666e9..7ea9785738b5 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -7,6 +7,8 @@ #include <bpf/bpf_endian.h> #include <asm/errno.h> +#include "bpf_compiler.h" + #define TC_ACT_OK 0 #define TC_ACT_SHOT 2 @@ -151,11 +153,11 @@ static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr, __u64 sum = csum; int i; -#pragma unroll + __pragma_loop_unroll for (i = 0; i < 4; i++) sum += (__u32)saddr->in6_u.u6_addr32[i]; -#pragma unroll + __pragma_loop_unroll for (i = 0; i < 4; i++) sum += (__u32)daddr->in6_u.u6_addr32[i]; diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c index 54cf1765118b..44e2b0ef23ae 100644 --- a/tools/testing/selftests/bpf/progs/xdping_kern.c +++ b/tools/testing/selftests/bpf/progs/xdping_kern.c @@ -15,6 +15,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include "bpf_compiler.h" #include "xdping.h" struct { @@ -116,7 +117,7 @@ int xdping_client(struct xdp_md *ctx) return XDP_PASS; if (pinginfo->start) { -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (i = 0; i < XDPING_MAX_COUNT; i++) { if (pinginfo->times[i] == 0) break; diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index c028d621c744..d98c72dc563e 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -211,7 +211,7 @@ static void test_lpm_map(int keysize) volatile size_t n_matches, n_matches_after_delete; size_t i, j, n_nodes, n_lookups; struct tlpm_node *t, *list = NULL; - struct bpf_lpm_trie_key *key; + struct bpf_lpm_trie_key_u8 *key; uint8_t *data, *value; int r, map; @@ -331,8 +331,8 @@ static void test_lpm_map(int keysize) static void test_lpm_ipaddr(void) { LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); - struct bpf_lpm_trie_key *key_ipv4; - struct bpf_lpm_trie_key *key_ipv6; + struct bpf_lpm_trie_key_u8 *key_ipv4; + struct bpf_lpm_trie_key_u8 *key_ipv6; size_t key_size_ipv4; size_t key_size_ipv6; int map_fd_ipv4; @@ -423,7 +423,7 @@ static void test_lpm_ipaddr(void) static void test_lpm_delete(void) { LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); - struct bpf_lpm_trie_key *key; + struct bpf_lpm_trie_key_u8 *key; size_t key_size; int map_fd; __u64 value; @@ -532,7 +532,7 @@ static void test_lpm_delete(void) static void test_lpm_get_next_key(void) { LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); - struct bpf_lpm_trie_key *key_p, *next_key_p; + struct bpf_lpm_trie_key_u8 *key_p, *next_key_p; size_t key_size; __u32 value = 0; int map_fd; @@ -693,9 +693,9 @@ static void *lpm_test_command(void *arg) { int i, j, ret, iter, key_size; struct lpm_mt_test_info *info = arg; - struct bpf_lpm_trie_key *key_p; + struct bpf_lpm_trie_key_u8 *key_p; - key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32); + key_size = sizeof(*key_p) + sizeof(__u32); key_p = alloca(key_size); for (iter = 0; iter < info->iter; iter++) for (i = 0; i < MAX_TEST_KEYS; i++) { @@ -717,7 +717,7 @@ static void *lpm_test_command(void *arg) ret = bpf_map_lookup_elem(info->map_fd, key_p, &value); assert(ret == 0 || errno == ENOENT); } else { - struct bpf_lpm_trie_key *next_key_p = alloca(key_size); + struct bpf_lpm_trie_key_u8 *next_key_p = alloca(key_size); ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p); assert(ret == 0 || errno == ENOENT || errno == ENOMEM); } @@ -752,7 +752,7 @@ static void test_lpm_multi_thread(void) /* create a trie */ value_size = sizeof(__u32); - key_size = sizeof(struct bpf_lpm_trie_key) + value_size; + key_size = sizeof(struct bpf_lpm_trie_key_hdr) + value_size; map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts); /* create 4 threads to test update, delete, lookup and get_next_key */ diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 2f9f6f250f17..80df51244886 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -385,10 +385,15 @@ int test__join_cgroup(const char *path); goto goto_label; \ }) +#define ALL_TO_DEV_NULL " >/dev/null 2>&1" + #define SYS_NOFAIL(fmt, ...) \ ({ \ char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + int n; \ + n = snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (n < sizeof(cmd) && sizeof(cmd) - n >= sizeof(ALL_TO_DEV_NULL)) \ + strcat(cmd, ALL_TO_DEV_NULL); \ system(cmd); \ }) diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index a59e56d804ee..28b6646662af 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -356,12 +356,12 @@ __u64 read_perf_max_sample_freq(void) return sample_freq; } -static int finit_module(int fd, const char *param_values, int flags) +int finit_module(int fd, const char *param_values, int flags) { return syscall(__NR_finit_module, fd, param_values, flags); } -static int delete_module(const char *name, int flags) +int delete_module(const char *name, int flags) { return syscall(__NR_delete_module, name, flags); } diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index d14de81727e6..d55f6ab12433 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -36,6 +36,8 @@ __u64 read_perf_max_sample_freq(void); int load_bpf_testmod(bool verbose); int unload_bpf_testmod(bool verbose); int kern_sync_rcu(void); +int finit_module(int fd, const char *param_values, int flags); +int delete_module(const char *name, int flags); static inline __u64 get_time_ns(void) { diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 4faa898ff7fc..27fd7ed3e4b0 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -271,7 +271,7 @@ ssize_t get_uprobe_offset(const void *addr) * addi r2,r2,XXXX */ { - const u32 *insn = (const u32 *)(uintptr_t)addr; + const __u32 *insn = (const __u32 *)(uintptr_t)addr; if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) || ((*insn & OP_RT_RA_MASK) == LIS_R2)) && diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 616d3581419c..31252bc8775e 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -869,7 +869,7 @@ bloom_simple_test() bloom_complex_test() { # Bloom filter index computation is affected from region ID, eRP - # ID and from the region key size. In order to excercise those parts + # ID and from the region key size. In order to exercise those parts # of the Bloom filter code, use a series of regions, each with a # different key size and send packet that should hit all of them. local index diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index 7a29a05bea8b..5bace0b7fb57 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -10,6 +10,7 @@ TEST_PROGS = devlink.sh \ fib.sh \ hw_stats_l3.sh \ nexthop.sh \ + peer.sh \ psample.sh \ tc-mq-visibility.sh \ udp_tunnel_nic.sh \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh index 7d7829f57550..6c52ce1b0450 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh @@ -49,7 +49,7 @@ for o in llrs rs; do Active FEC encoding: ${o^^}" done -# Test mutliple bits +# Test multiple bits $ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh new file mode 100755 index 000000000000..aed62d9e6c0a --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ../../../net/net_helper.sh + +NSIM_DEV_1_ID=$((256 + RANDOM % 256)) +NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID +NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +socat_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + return 0 +} + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_DEV_1_SYS/net -exec basename {} \;) + NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_DEV_2_SYS/net -exec basename {} \;) + + ip link set $NSIM_DEV_1_NAME netns nssv + ip link set $NSIM_DEV_2_NAME netns nscl + + ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME + ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME + + ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up + ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +socat_check || exit 4 + +modprobe netdevsim + +# linking + +echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_DEV_1_FD=$((256 + RANDOM % 256)) +exec {NSIM_DEV_1_FD}</var/run/netns/nssv +NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex) + +NSIM_DEV_2_FD=$((256 + RANDOM % 256)) +exec {NSIM_DEV_2_FD}</var/run/netns/nscl +NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex) + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:2000" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with non-existent netdevsim should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with non-existent netnsid should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with self should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +# argument error checking + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "invalid arg should fail" + cleanup_ns + exit 1 +fi + +# send/recv packets + +tmp_file=$(mktemp) +ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file & +pid=$! +res=0 + +wait_local_port_listen nssv 1234 tcp + +echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234 + +count=$(cat $tmp_file | wc -c) +if [[ $count -ne 3 ]]; then + echo "expected 3 bytes, got $count" + res=1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL + +kill $pid +echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit $res diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config index 6c4f901d6fed..110d73917615 100644 --- a/tools/testing/selftests/iommu/config +++ b/tools/testing/selftests/iommu/config @@ -1,2 +1,3 @@ -CONFIG_IOMMUFD -CONFIG_IOMMUFD_TEST +CONFIG_IOMMUFD=y +CONFIG_FAULT_INJECTION=y +CONFIG_IOMMUFD_TEST=y diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 1a881e7a21d1..edf1c99c9936 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -12,6 +12,7 @@ static unsigned long HUGEPAGE_SIZE; #define MOCK_PAGE_SIZE (PAGE_SIZE / 2) +#define MOCK_HUGE_PAGE_SIZE (512 * MOCK_PAGE_SIZE) static unsigned long get_huge_page_size(void) { @@ -1716,10 +1717,12 @@ FIXTURE(iommufd_dirty_tracking) FIXTURE_VARIANT(iommufd_dirty_tracking) { unsigned long buffer_size; + bool hugepages; }; FIXTURE_SETUP(iommufd_dirty_tracking) { + int mmap_flags; void *vrc; int rc; @@ -1732,25 +1735,41 @@ FIXTURE_SETUP(iommufd_dirty_tracking) variant->buffer_size, rc); } + mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED; + if (variant->hugepages) { + /* + * MAP_POPULATE will cause the kernel to fail mmap if THPs are + * not available. + */ + mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + } assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + mmap_flags, -1, 0); assert(vrc == self->buffer); self->page_size = MOCK_PAGE_SIZE; self->bitmap_size = variant->buffer_size / self->page_size / BITS_PER_BYTE; - /* Provision with an extra (MOCK_PAGE_SIZE) for the unaligned case */ + /* Provision with an extra (PAGE_SIZE) for the unaligned case */ rc = posix_memalign(&self->bitmap, PAGE_SIZE, - self->bitmap_size + MOCK_PAGE_SIZE); + self->bitmap_size + PAGE_SIZE); assert(!rc); assert(self->bitmap); assert((uintptr_t)self->bitmap % PAGE_SIZE == 0); test_ioctl_ioas_alloc(&self->ioas_id); - test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id, - &self->idev_id); + /* Enable 1M mock IOMMU hugepages */ + if (variant->hugepages) { + test_cmd_mock_domain_flags(self->ioas_id, + MOCK_FLAGS_DEVICE_HUGE_IOVA, + &self->stdev_id, &self->hwpt_id, + &self->idev_id); + } else { + test_cmd_mock_domain(self->ioas_id, &self->stdev_id, + &self->hwpt_id, &self->idev_id); + } } FIXTURE_TEARDOWN(iommufd_dirty_tracking) @@ -1784,12 +1803,26 @@ FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) .buffer_size = 128UL * 1024UL * 1024UL, }; +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge) +{ + /* 4K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, + .hugepages = true, +}; + FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M) { /* 8K bitmap (256M IOVA range) */ .buffer_size = 256UL * 1024UL * 1024UL, }; +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge) +{ + /* 8K bitmap (256M IOVA range) */ + .buffer_size = 256UL * 1024UL * 1024UL, + .hugepages = true, +}; + TEST_F(iommufd_dirty_tracking, enforce_dirty) { uint32_t ioas_id, stddev_id, idev_id; @@ -1849,65 +1882,80 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability) TEST_F(iommufd_dirty_tracking, get_dirty_bitmap) { - uint32_t stddev_id; + uint32_t page_size = MOCK_PAGE_SIZE; uint32_t hwpt_id; uint32_t ioas_id; + if (variant->hugepages) + page_size = MOCK_HUGE_PAGE_SIZE; + test_ioctl_ioas_alloc(&ioas_id); test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer, variant->buffer_size, MOCK_APERTURE_START); test_cmd_hwpt_alloc(self->idev_id, ioas_id, IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); - test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); test_cmd_set_dirty_tracking(hwpt_id, true); test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap, self->bitmap_size, 0, _metadata); /* PAGE_SIZE unaligned bitmap */ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap + MOCK_PAGE_SIZE, self->bitmap_size, 0, _metadata); - test_ioctl_destroy(stddev_id); + /* u64 unaligned bitmap */ + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, page_size, + self->bitmap + 0xff1, self->bitmap_size, 0, + _metadata); + test_ioctl_destroy(hwpt_id); } TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear) { - uint32_t stddev_id; + uint32_t page_size = MOCK_PAGE_SIZE; uint32_t hwpt_id; uint32_t ioas_id; + if (variant->hugepages) + page_size = MOCK_HUGE_PAGE_SIZE; + test_ioctl_ioas_alloc(&ioas_id); test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer, variant->buffer_size, MOCK_APERTURE_START); test_cmd_hwpt_alloc(self->idev_id, ioas_id, IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); - test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); test_cmd_set_dirty_tracking(hwpt_id, true); test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap, self->bitmap_size, IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, _metadata); /* Unaligned bitmap */ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap + MOCK_PAGE_SIZE, self->bitmap_size, IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, _metadata); - test_ioctl_destroy(stddev_id); + /* u64 unaligned bitmap */ + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, page_size, + self->bitmap + 0xff1, self->bitmap_size, + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, + _metadata); + test_ioctl_destroy(hwpt_id); } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index c646264aa41f..8d2b46b2114d 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -344,16 +344,19 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length, page_size, bitmap, nr)) static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, - __u64 iova, size_t page_size, __u64 *bitmap, + __u64 iova, size_t page_size, + size_t pte_page_size, __u64 *bitmap, __u64 bitmap_size, __u32 flags, struct __test_metadata *_metadata) { - unsigned long i, nbits = bitmap_size * BITS_PER_BYTE; - unsigned long nr = nbits / 2; + unsigned long npte = pte_page_size / page_size, pteset = 2 * npte; + unsigned long nbits = bitmap_size * BITS_PER_BYTE; + unsigned long j, i, nr = nbits / pteset ?: 1; __u64 out_dirty = 0; /* Mark all even bits as dirty in the mock domain */ - for (i = 0; i < nbits; i += 2) + memset(bitmap, 0, bitmap_size); + for (i = 0; i < nbits; i += pteset) set_bit(i, (unsigned long *)bitmap); test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, @@ -365,8 +368,12 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, flags); /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */ - for (i = 0; i < nbits; i++) { - ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap)); + for (i = 0; i < nbits; i += pteset) { + for (j = 0; j < pteset; j++) { + ASSERT_EQ(j < npte, + test_bit(i + j, (unsigned long *)bitmap)); + } + ASSERT_EQ(!(i % pteset), test_bit(i, (unsigned long *)bitmap)); } memset(bitmap, 0, bitmap_size); @@ -374,19 +381,23 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, flags); /* It as read already -- expect all zeroes */ - for (i = 0; i < nbits; i++) { - ASSERT_EQ(!(i % 2) && (flags & - IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR), - test_bit(i, (unsigned long *)bitmap)); + for (i = 0; i < nbits; i += pteset) { + for (j = 0; j < pteset; j++) { + ASSERT_EQ( + (j < npte) && + (flags & + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR), + test_bit(i + j, (unsigned long *)bitmap)); + } } return 0; } -#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, bitmap, \ - bitmap_size, flags, _metadata) \ +#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, pte_size,\ + bitmap, bitmap_size, flags, _metadata) \ ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \ - page_size, bitmap, bitmap_size, \ - flags, _metadata)) + page_size, pte_size, bitmap, \ + bitmap_size, flags, _metadata)) static int _test_cmd_create_access(int fd, unsigned int ioas_id, __u32 *access_id, unsigned int flags) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index a781e6311810..541bf192e30e 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -25,6 +25,7 @@ * ksft_test_result_skip(fmt, ...); * ksft_test_result_xfail(fmt, ...); * ksft_test_result_error(fmt, ...); + * ksft_test_result_code(exit_code, test_name, fmt, ...); * * When all tests are finished, clean up and exit the program with one of: * @@ -254,6 +255,50 @@ static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...) va_end(args); } +static inline __printf(3, 4) +void ksft_test_result_code(int exit_code, const char *test_name, + const char *msg, ...) +{ + const char *tap_code = "ok"; + const char *directive = ""; + int saved_errno = errno; + va_list args; + + switch (exit_code) { + case KSFT_PASS: + ksft_cnt.ksft_pass++; + break; + case KSFT_XFAIL: + directive = " # XFAIL "; + ksft_cnt.ksft_xfail++; + break; + case KSFT_XPASS: + directive = " # XPASS "; + ksft_cnt.ksft_xpass++; + break; + case KSFT_SKIP: + directive = " # SKIP "; + ksft_cnt.ksft_xskip++; + break; + case KSFT_FAIL: + default: + tap_code = "not ok"; + ksft_cnt.ksft_fail++; + break; + } + + /* Docs seem to call for double space if directive is absent */ + if (!directive[0] && msg[0]) + directive = " # "; + + va_start(args, msg); + printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive); + errno = saved_errno; + vprintf(msg, args); + printf("\n"); + va_end(args); +} + static inline int ksft_exit_pass(void) { ksft_print_cnts(); diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index e05ac8261046..4fd735e48ee7 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -56,6 +56,7 @@ #include <asm/types.h> #include <ctype.h> #include <errno.h> +#include <limits.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> @@ -95,14 +96,6 @@ * E.g., #define TH_LOG_ENABLED 1 * * If no definition is provided, logging is enabled by default. - * - * If there is no way to print an error message for the process running the - * test (e.g. not allowed to write to stderr), it is still possible to get the - * ASSERT_* number for which the test failed. This behavior can be enabled by - * writing `_metadata->no_print = true;` before the check sequence that is - * unable to print. When an error occur, instead of printing an error message - * and calling `abort(3)`, the test process call `_exit(2)` with the assert - * number as argument, which is then printed by the parent process. */ #define TH_LOG(fmt, ...) do { \ if (TH_LOG_ENABLED) \ @@ -135,8 +128,7 @@ fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ _metadata->results->reason); \ } \ - _metadata->passed = 1; \ - _metadata->skip = 1; \ + _metadata->exit_code = KSFT_SKIP; \ _metadata->trigger = 0; \ statement; \ } while (0) @@ -363,6 +355,11 @@ * Defines a test that depends on a fixture (e.g., is part of a test case). * Very similar to TEST() except that *self* is the setup instance of fixture's * datatype exposed for use by the implementation. + * + * The @test_name code is run in a separate process sharing the same memory + * (i.e. vfork), which means that the test process can update its privileges + * without impacting the related FIXTURE_TEARDOWN() (e.g. to remove files from + * a directory where write access was dropped). */ #define TEST_F(fixture_name, test_name) \ __TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT) @@ -384,17 +381,34 @@ { \ /* fixture data is alloced, setup, and torn down per call. */ \ FIXTURE_DATA(fixture_name) self; \ + pid_t child = 1; \ + int status = 0; \ memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \ if (setjmp(_metadata->env) == 0) { \ - fixture_name##_setup(_metadata, &self, variant->data); \ - /* Let setup failure terminate early. */ \ - if (!_metadata->passed || _metadata->skip) \ - return; \ - _metadata->setup_completed = true; \ - fixture_name##_##test_name(_metadata, &self, variant->data); \ + /* Use the same _metadata. */ \ + child = vfork(); \ + if (child == 0) { \ + fixture_name##_setup(_metadata, &self, variant->data); \ + /* Let setup failure terminate early. */ \ + if (_metadata->exit_code) \ + _exit(0); \ + _metadata->setup_completed = true; \ + fixture_name##_##test_name(_metadata, &self, variant->data); \ + } else if (child < 0 || child != waitpid(child, &status, 0)) { \ + ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \ + _metadata->exit_code = KSFT_FAIL; \ + } \ + } \ + if (child == 0) { \ + if (_metadata->setup_completed && !_metadata->teardown_parent) \ + fixture_name##_teardown(_metadata, &self, variant->data); \ + _exit(0); \ } \ - if (_metadata->setup_completed) \ + if (_metadata->setup_completed && _metadata->teardown_parent) \ fixture_name##_teardown(_metadata, &self, variant->data); \ + if (!WIFEXITED(status) && WIFSIGNALED(status)) \ + /* Forward signal to __wait_for_test(). */ \ + kill(getpid(), WTERMSIG(status)); \ __test_check_assert(_metadata); \ } \ static struct __test_metadata \ @@ -404,6 +418,7 @@ .fixture = &_##fixture_name##_fixture_object, \ .termsig = signal, \ .timeout = tmout, \ + .teardown_parent = false, \ }; \ static void __attribute__((constructor)) \ _register_##fixture_name##_##test_name(void) \ @@ -694,18 +709,12 @@ for (; _metadata->trigger; _metadata->trigger = \ __bail(_assert, _metadata)) -#define __INC_STEP(_metadata) \ - /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \ - if (_metadata->passed && _metadata->step < 253) \ - _metadata->step++; - #define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) #define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ __typeof__(_expected) __exp = (_expected); \ __typeof__(_seen) __seen = (_seen); \ - if (_assert) __INC_STEP(_metadata); \ if (!(__exp _t __seen)) { \ /* Report with actual signedness to avoid weird output. */ \ switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \ @@ -742,7 +751,7 @@ break; \ } \ } \ - _metadata->passed = 0; \ + _metadata->exit_code = KSFT_FAIL; \ /* Ensure the optional handler is triggered */ \ _metadata->trigger = 1; \ } \ @@ -751,10 +760,9 @@ #define __EXPECT_STR(_expected, _seen, _t, _assert) do { \ const char *__exp = (_expected); \ const char *__seen = (_seen); \ - if (_assert) __INC_STEP(_metadata); \ if (!(strcmp(__exp, __seen) _t 0)) { \ __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \ - _metadata->passed = 0; \ + _metadata->exit_code = KSFT_FAIL; \ _metadata->trigger = 1; \ } \ } while (0); OPTIONAL_HANDLER(_assert) @@ -800,6 +808,37 @@ struct __fixture_metadata { .prev = &_fixture_global, }; +struct __test_xfail { + struct __fixture_metadata *fixture; + struct __fixture_variant_metadata *variant; + struct __test_metadata *test; + struct __test_xfail *prev, *next; +}; + +/** + * XFAIL_ADD() - mark variant + test case combination as expected to fail + * @fixture_name: name of the fixture + * @variant_name: name of the variant + * @test_name: name of the test case + * + * Mark a combination of variant + test case for a given fixture as expected + * to fail. Tests marked this way will report XPASS / XFAIL return codes, + * instead of PASS / FAIL,and use respective counters. + */ +#define XFAIL_ADD(fixture_name, variant_name, test_name) \ + static struct __test_xfail \ + _##fixture_name##_##variant_name##_##test_name##_xfail = \ + { \ + .fixture = &_##fixture_name##_fixture_object, \ + .variant = &_##fixture_name##_##variant_name##_object, \ + .test = &_##fixture_name##_##test_name##_object, \ + }; \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \ + { \ + __register_xfail(&_##fixture_name##_##variant_name##_##test_name##_xfail); \ + } + static struct __fixture_metadata *__fixture_list = &_fixture_global; static int __constructor_order; @@ -814,6 +853,7 @@ static inline void __register_fixture(struct __fixture_metadata *f) struct __fixture_variant_metadata { const char *name; const void *data; + struct __test_xfail *xfails; struct __fixture_variant_metadata *prev, *next; }; @@ -832,20 +872,24 @@ struct __test_metadata { pid_t pid; /* pid of test when being run */ struct __fixture_metadata *fixture; int termsig; - int passed; - int skip; /* did SKIP get used? */ + int exit_code; int trigger; /* extra handler after the evaluation */ int timeout; /* seconds to wait for test timeout */ bool timed_out; /* did this test timeout instead of exiting? */ - __u8 step; - bool no_print; /* manual trigger when TH_LOG_STREAM is not available */ bool aborted; /* stopped test due to failed ASSERT */ bool setup_completed; /* did setup finish? */ + bool teardown_parent; /* run teardown in a parent process */ jmp_buf env; /* for exiting out of test early */ struct __test_results *results; struct __test_metadata *prev, *next; }; +static inline bool __test_passed(struct __test_metadata *metadata) +{ + return metadata->exit_code != KSFT_FAIL && + metadata->exit_code <= KSFT_SKIP; +} + /* * Since constructors are called in reverse order, reverse the test * list so tests are run in source declaration order. @@ -860,6 +904,11 @@ static inline void __register_test(struct __test_metadata *t) __LIST_APPEND(t->fixture->tests, t); } +static inline void __register_xfail(struct __test_xfail *xf) +{ + __LIST_APPEND(xf->variant->xfails, xf); +} + static inline int __bail(int for_realz, struct __test_metadata *t) { /* if this is ASSERT, return immediately. */ @@ -873,11 +922,8 @@ static inline int __bail(int for_realz, struct __test_metadata *t) static inline void __test_check_assert(struct __test_metadata *t) { - if (t->aborted) { - if (t->no_print) - _exit(t->step); + if (t->aborted) abort(); - } } struct __test_metadata *__active_test; @@ -913,7 +959,7 @@ void __wait_for_test(struct __test_metadata *t) int status; if (sigaction(SIGALRM, &action, &saved_action)) { - t->passed = 0; + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: unable to install SIGALRM handler\n", t->name); @@ -925,7 +971,7 @@ void __wait_for_test(struct __test_metadata *t) waitpid(t->pid, &status, 0); alarm(0); if (sigaction(SIGALRM, &saved_action, NULL)) { - t->passed = 0; + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: unable to uninstall SIGALRM handler\n", t->name); @@ -934,16 +980,16 @@ void __wait_for_test(struct __test_metadata *t) __active_test = NULL; if (t->timed_out) { - t->passed = 0; + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test terminated by timeout\n", t->name); } else if (WIFEXITED(status)) { - if (WEXITSTATUS(status) == 255) { - /* SKIP */ - t->passed = 1; - t->skip = 1; + if (WEXITSTATUS(status) == KSFT_SKIP || + WEXITSTATUS(status) == KSFT_XPASS || + WEXITSTATUS(status) == KSFT_XFAIL) { + t->exit_code = WEXITSTATUS(status); } else if (t->termsig != -1) { - t->passed = 0; + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test exited normally instead of by signal (code: %d)\n", t->name, @@ -951,26 +997,25 @@ void __wait_for_test(struct __test_metadata *t) } else { switch (WEXITSTATUS(status)) { /* Success */ - case 0: - t->passed = 1; + case KSFT_PASS: + t->exit_code = KSFT_PASS; break; - /* Other failure, assume step report. */ + /* Failure */ default: - t->passed = 0; + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, - "# %s: Test failed at step #%d\n", - t->name, - WEXITSTATUS(status)); + "# %s: Test failed\n", + t->name); } } } else if (WIFSIGNALED(status)) { - t->passed = 0; + t->exit_code = KSFT_FAIL; if (WTERMSIG(status) == SIGABRT) { fprintf(TH_LOG_STREAM, "# %s: Test terminated by assertion\n", t->name); } else if (WTERMSIG(status) == t->termsig) { - t->passed = 1; + t->exit_code = KSFT_PASS; } else { fprintf(TH_LOG_STREAM, "# %s: Test terminated unexpectedly by signal %d\n", @@ -1110,16 +1155,19 @@ void __run_test(struct __fixture_metadata *f, struct __fixture_variant_metadata *variant, struct __test_metadata *t) { + struct __test_xfail *xfail; + char test_name[LINE_MAX]; + const char *diagnostic; + /* reset test struct */ - t->passed = 1; - t->skip = 0; + t->exit_code = KSFT_PASS; t->trigger = 0; - t->step = 1; - t->no_print = 0; memset(t->results->reason, 0, sizeof(t->results->reason)); - ksft_print_msg(" RUN %s%s%s.%s ...\n", - f->name, variant->name[0] ? "." : "", variant->name, t->name); + snprintf(test_name, sizeof(test_name), "%s%s%s.%s", + f->name, variant->name[0] ? "." : "", variant->name, t->name); + + ksft_print_msg(" RUN %s ...\n", test_name); /* Make sure output buffers are flushed before fork */ fflush(stdout); @@ -1128,29 +1176,33 @@ void __run_test(struct __fixture_metadata *f, t->pid = fork(); if (t->pid < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); - t->passed = 0; + t->exit_code = KSFT_FAIL; } else if (t->pid == 0) { setpgrp(); t->fn(t, variant); - if (t->skip) - _exit(255); - /* Pass is exit 0 */ - if (t->passed) - _exit(0); - /* Something else happened, report the step. */ - _exit(t->step); + _exit(t->exit_code); } else { __wait_for_test(t); } - ksft_print_msg(" %4s %s%s%s.%s\n", t->passed ? "OK" : "FAIL", - f->name, variant->name[0] ? "." : "", variant->name, t->name); + ksft_print_msg(" %4s %s\n", + __test_passed(t) ? "OK" : "FAIL", test_name); - if (t->skip) - ksft_test_result_skip("%s\n", t->results->reason[0] ? - t->results->reason : "unknown"); + /* Check if we're expecting this test to fail */ + for (xfail = variant->xfails; xfail; xfail = xfail->next) + if (xfail->test == t) + break; + if (xfail) + t->exit_code = __test_passed(t) ? KSFT_XPASS : KSFT_XFAIL; + + if (t->results->reason[0]) + diagnostic = t->results->reason; + else if (t->exit_code == KSFT_PASS || t->exit_code == KSFT_FAIL) + diagnostic = NULL; else - ksft_test_result(t->passed, "%s%s%s.%s\n", - f->name, variant->name[0] ? "." : "", variant->name, t->name); + diagnostic = "unknown"; + + ksft_test_result_code(t->exit_code, test_name, + diagnostic ? "%s" : "", diagnostic); } static int test_harness_run(int argc, char **argv) @@ -1198,7 +1250,7 @@ static int test_harness_run(int argc, char **argv) t->results = results; __run_test(f, v, t); t->results = NULL; - if (t->passed) + if (__test_passed(t)) pass_count++; else ret = 1; diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index 646f778dfb1e..a6f89aaea77d 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -307,7 +307,7 @@ TEST(ruleset_fd_transfer) dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC); ASSERT_LE(0, dir_fd); ASSERT_EQ(0, close(dir_fd)); - _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + _exit(_metadata->exit_code); return; } diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index e64bbdf0e86e..401e2eb092a3 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -23,62 +23,8 @@ #define __maybe_unused __attribute__((__unused__)) #endif -/* - * TEST_F_FORK() is useful when a test drop privileges but the corresponding - * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory - * where write actions are denied). For convenience, FIXTURE_TEARDOWN() is - * also called when the test failed, but not when FIXTURE_SETUP() failed. For - * this to be possible, we must not call abort() but instead exit smoothly - * (hence the step print). - */ -/* clang-format off */ -#define TEST_F_FORK(fixture_name, test_name) \ - static void fixture_name##_##test_name##_child( \ - struct __test_metadata *_metadata, \ - FIXTURE_DATA(fixture_name) *self, \ - const FIXTURE_VARIANT(fixture_name) *variant); \ - TEST_F(fixture_name, test_name) \ - { \ - int status; \ - const pid_t child = fork(); \ - if (child < 0) \ - abort(); \ - if (child == 0) { \ - _metadata->no_print = 1; \ - fixture_name##_##test_name##_child(_metadata, self, variant); \ - if (_metadata->skip) \ - _exit(255); \ - if (_metadata->passed) \ - _exit(0); \ - _exit(_metadata->step); \ - } \ - if (child != waitpid(child, &status, 0)) \ - abort(); \ - if (WIFSIGNALED(status) || !WIFEXITED(status)) { \ - _metadata->passed = 0; \ - _metadata->step = 1; \ - return; \ - } \ - switch (WEXITSTATUS(status)) { \ - case 0: \ - _metadata->passed = 1; \ - break; \ - case 255: \ - _metadata->passed = 1; \ - _metadata->skip = 1; \ - break; \ - default: \ - _metadata->passed = 0; \ - _metadata->step = WEXITSTATUS(status); \ - break; \ - } \ - } \ - static void fixture_name##_##test_name##_child( \ - struct __test_metadata __attribute__((unused)) *_metadata, \ - FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ - const FIXTURE_VARIANT(fixture_name) \ - __attribute__((unused)) *variant) -/* clang-format on */ +/* TEST_F_FORK() should not be used for new tests. */ +#define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name) #ifndef landlock_create_ruleset static inline int diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 2d6d9b43d958..9a6036fbf289 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -285,6 +285,8 @@ static void prepare_layout_opt(struct __test_metadata *const _metadata, static void prepare_layout(struct __test_metadata *const _metadata) { + _metadata->teardown_parent = true; + prepare_layout_opt(_metadata, &mnt_tmp); } @@ -1964,7 +1966,7 @@ static void test_execute(struct __test_metadata *const _metadata, const int err, strerror(errno)); }; ASSERT_EQ(err, errno); - _exit(_metadata->passed ? 2 : 1); + _exit(__test_passed(_metadata) ? 2 : 1); return; } ASSERT_EQ(child, waitpid(child, &status, 0)); @@ -3807,7 +3809,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes) ASSERT_EQ(0, close(socket_fds[0])); - _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + _exit(_metadata->exit_code); return; } @@ -3861,9 +3863,7 @@ FIXTURE_SETUP(layout1_bind) FIXTURE_TEARDOWN(layout1_bind) { - set_cap(_metadata, CAP_SYS_ADMIN); - EXPECT_EQ(0, umount(dir_s2d2)); - clear_cap(_metadata, CAP_SYS_ADMIN); + /* umount(dir_s2d2)) is handled by namespace lifetime. */ remove_layout1(_metadata); @@ -4276,9 +4276,8 @@ FIXTURE_TEARDOWN(layout2_overlay) EXPECT_EQ(0, remove_path(lower_fl1)); EXPECT_EQ(0, remove_path(lower_do1_fo2)); EXPECT_EQ(0, remove_path(lower_fo1)); - set_cap(_metadata, CAP_SYS_ADMIN); - EXPECT_EQ(0, umount(LOWER_BASE)); - clear_cap(_metadata, CAP_SYS_ADMIN); + + /* umount(LOWER_BASE)) is handled by namespace lifetime. */ EXPECT_EQ(0, remove_path(LOWER_BASE)); EXPECT_EQ(0, remove_path(upper_do1_fu3)); @@ -4287,14 +4286,11 @@ FIXTURE_TEARDOWN(layout2_overlay) EXPECT_EQ(0, remove_path(upper_do1_fo2)); EXPECT_EQ(0, remove_path(upper_fo1)); EXPECT_EQ(0, remove_path(UPPER_WORK "/work")); - set_cap(_metadata, CAP_SYS_ADMIN); - EXPECT_EQ(0, umount(UPPER_BASE)); - clear_cap(_metadata, CAP_SYS_ADMIN); + + /* umount(UPPER_BASE)) is handled by namespace lifetime. */ EXPECT_EQ(0, remove_path(UPPER_BASE)); - set_cap(_metadata, CAP_SYS_ADMIN); - EXPECT_EQ(0, umount(MERGE_DATA)); - clear_cap(_metadata, CAP_SYS_ADMIN); + /* umount(MERGE_DATA)) is handled by namespace lifetime. */ EXPECT_EQ(0, remove_path(MERGE_DATA)); cleanup_layout(_metadata); @@ -4691,6 +4687,8 @@ FIXTURE_SETUP(layout3_fs) SKIP(return, "this filesystem is not supported (setup)"); } + _metadata->teardown_parent = true; + slash = strrchr(variant->file_path, '/'); ASSERT_NE(slash, NULL); dir_len = (size_t)slash - (size_t)variant->file_path; diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 936cfc879f1d..f21cfbbc3638 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -539,7 +539,7 @@ static void test_bind_and_connect(struct __test_metadata *const _metadata, } EXPECT_EQ(0, close(connect_fd)); - _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + _exit(_metadata->exit_code); return; } @@ -834,7 +834,7 @@ TEST_F(protocol, connect_unspec) } EXPECT_EQ(0, close(connect_fd)); - _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + _exit(_metadata->exit_code); return; } diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c index 55e7871631a1..a19db4d0b3bd 100644 --- a/tools/testing/selftests/landlock/ptrace_test.c +++ b/tools/testing/selftests/landlock/ptrace_test.c @@ -314,7 +314,7 @@ TEST_F(hierarchy, trace) ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); if (variant->domain_both) { create_domain(_metadata); - if (!_metadata->passed) + if (!__test_passed(_metadata)) /* Aborts before forking. */ return; } @@ -375,7 +375,7 @@ TEST_F(hierarchy, trace) /* Waits for the parent PTRACE_ATTACH test. */ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); - _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + _exit(_metadata->exit_code); return; } @@ -430,9 +430,10 @@ TEST_F(hierarchy, trace) /* Signals that the parent PTRACE_ATTACH test is done. */ ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); ASSERT_EQ(child, waitpid(child, &status, 0)); + if (WIFSIGNALED(status) || !WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) - _metadata->passed = 0; + _metadata->exit_code = KSFT_FAIL; } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index 20294553a5dd..d2cfc9b494a0 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -138,7 +138,7 @@ FIXTURE_SETUP(hmm) self->fd = hmm_open(variant->device_number); if (self->fd < 0 && hmm_is_coherent_type(variant->device_number)) - SKIP(exit(0), "DEVICE_COHERENT not available"); + SKIP(return, "DEVICE_COHERENT not available"); ASSERT_GE(self->fd, 0); } @@ -149,7 +149,7 @@ FIXTURE_SETUP(hmm2) self->fd0 = hmm_open(variant->device_number0); if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0)) - SKIP(exit(0), "DEVICE_COHERENT not available"); + SKIP(return, "DEVICE_COHERENT not available"); ASSERT_GE(self->fd0, 0); self->fd1 = hmm_open(variant->device_number1); ASSERT_GE(self->fd1, 0); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index cce90a10515a..2b9f8cc52639 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1517,6 +1517,12 @@ int main(int argc, char *argv[]) continue; uffd_test_start("%s on %s", test->name, mem_type->name); + if ((mem_type->mem_flag == MEM_HUGETLB || + mem_type->mem_flag == MEM_HUGETLB_PRIVATE) && + (default_huge_page_size() == 0)) { + uffd_test_skip("huge page size is 0, feature missing?"); + continue; + } if (!uffd_feature_supported(test)) { uffd_test_skip("feature missing"); continue; diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 3ec1050e47a2..73895711cdf4 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -789,6 +789,7 @@ fib6_gc_test() set -e EXPIRE=5 + GC_WAIT_TIME=$((EXPIRE * 2 + 2)) # Check expiration of routes every $EXPIRE seconds (GC) $NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE @@ -805,7 +806,7 @@ fib6_gc_test() $IP -6 route add 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - sleep $(($EXPIRE * 2 + 1)) + sleep $GC_WAIT_TIME $NS_EXEC sysctl -wq net.ipv6.route.flush=1 check_rt_num 0 $($IP -6 route list |grep expires|wc -l) log_test $ret 0 "ipv6 route garbage collection" @@ -823,7 +824,8 @@ fib6_gc_test() $IP -6 route add 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - sleep $(($EXPIRE * 2 + 1)) + # Wait for GC + sleep $GC_WAIT_TIME check_rt_num 0 $($IP -6 route list |grep expires|wc -l) log_test $ret 0 "ipv6 route garbage collection (with permanent routes)" @@ -840,10 +842,8 @@ fib6_gc_test() $IP -6 route replace 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - check_rt_num_clean 5 $($IP -6 route list |grep expires|wc -l) || return # Wait for GC - sleep $(($EXPIRE * 2 + 1)) - $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + sleep $GC_WAIT_TIME check_rt_num 0 $($IP -6 route list |grep expires|wc -l) log_test $ret 0 "ipv6 route garbage collection (replace with expires)" @@ -863,8 +863,7 @@ fib6_gc_test() check_rt_num_clean 0 $($IP -6 route list |grep expires|wc -l) || return # Wait for GC - sleep $(($EXPIRE * 2 + 1)) - + sleep $GC_WAIT_TIME check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" @@ -901,9 +900,7 @@ fib6_gc_test() check_rt_num_clean 1 $($IP -6 route list|grep expires|wc -l) || return # Wait for GC - sleep $(($EXPIRE * 2 + 1)) - - $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + sleep $GC_WAIT_TIME check_rt_num 0 $($IP -6 route list |grep expires|wc -l) log_test $ret 0 "ipv6 route garbage collection (RA message)" diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 56eb83d1a3bd..1783c10215e5 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -183,42 +183,42 @@ send_src_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_src_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } send_src_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_flowlabel() @@ -234,14 +234,14 @@ send_src_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:4::2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:4::2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } custom_hash_test() diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index 4a546509de90..1fc4f0242fc5 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -28,6 +28,8 @@ PING=ping PING6=ping6 # Packet generator. Some distributions use 'mz'. MZ=mausezahn +# mausezahn delay between transmissions in microseconds. +MZ_DELAY=0 # Time to wait after interfaces participating in the test are all UP WAIT_TIME=5 # Whether to pause on failure or not. diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index 0446db9c6f74..9788bd0f6e8b 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -278,42 +278,42 @@ send_src_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_src_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } send_src_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_flowlabel() @@ -329,14 +329,14 @@ send_src_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } custom_hash_test() diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh index e4009f658003..efca6114a3ce 100755 --- a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh @@ -267,7 +267,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh index e449475c4d3e..a71ad39fc0c3 100755 --- a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh @@ -266,9 +266,9 @@ multipath6_test() local t0_222=$(tc_rule_stats_get $ul32 222 ingress) ip vrf exec v$h1 \ - $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \ - -B "2001:db8:2::2-2001:db8:2::1e" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \ + -B "2001:db8:2::2-2001:db8:2::3e" \ + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh index a8d8e8b3dc81..57531c1d884d 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh @@ -220,7 +220,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh index d03aa2cab9fd..7d5b2b9cc133 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh @@ -64,7 +64,6 @@ ALL_TESTS=" ping_ipv6 multipath_ipv4 multipath_ipv6 - multipath_ipv6_l4 " NUM_NETIFS=6 @@ -245,7 +244,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) @@ -264,34 +263,6 @@ multipath6_test() local weight1=$1; shift local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 0 - ip nexthop replace id 103 group 101,$weight1/102,$weight2 - - local t0_111=$(tc_rule_stats_get $ul2 111 ingress) - local t0_222=$(tc_rule_stats_get $ul2 222 ingress) - - # Generate 16384 echo requests, each with a random flow label. - for ((i=0; i < 16384; ++i)); do - ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null - done - - local t1_111=$(tc_rule_stats_get $ul2 111 ingress) - local t1_222=$(tc_rule_stats_get $ul2 222 ingress) - - local d111=$((t1_111 - t0_111)) - local d222=$((t1_222 - t0_222)) - multipath_eval "$what" $weight1 $weight2 $d111 $d222 - - ip nexthop replace id 103 group 101/102 - sysctl_restore net.ipv6.fib_multipath_hash_policy -} - -multipath6_l4_test() -{ - local what=$1; shift - local weight1=$1; shift - local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 1 ip nexthop replace id 103 group 101,$weight1/102,$weight2 @@ -300,7 +271,7 @@ multipath6_l4_test() ip vrf exec v$h1 \ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) @@ -339,14 +310,6 @@ multipath_ipv6() multipath6_test "Weighted MP 11:45" 11 45 } -multipath_ipv6_l4() -{ - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 -} - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh index 088b65e64d66..370f9925302d 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh @@ -64,7 +64,6 @@ ALL_TESTS=" ping_ipv6 multipath_ipv4 multipath_ipv6 - multipath_ipv6_l4 " NUM_NETIFS=6 @@ -248,7 +247,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) @@ -267,35 +266,6 @@ multipath6_test() local weight1=$1; shift local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 0 - ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ - type resilient - - local t0_111=$(tc_rule_stats_get $ul2 111 ingress) - local t0_222=$(tc_rule_stats_get $ul2 222 ingress) - - # Generate 16384 echo requests, each with a random flow label. - for ((i=0; i < 16384; ++i)); do - ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null - done - - local t1_111=$(tc_rule_stats_get $ul2 111 ingress) - local t1_222=$(tc_rule_stats_get $ul2 222 ingress) - - local d111=$((t1_111 - t0_111)) - local d222=$((t1_222 - t0_222)) - multipath_eval "$what" $weight1 $weight2 $d111 $d222 - - ip nexthop replace id 103 group 101/102 type resilient - sysctl_restore net.ipv6.fib_multipath_hash_policy -} - -multipath6_l4_test() -{ - local what=$1; shift - local weight1=$1; shift - local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 1 ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ type resilient @@ -305,7 +275,7 @@ multipath6_l4_test() ip vrf exec v$h1 \ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) @@ -344,14 +314,6 @@ multipath_ipv6() multipath6_test "Weighted MP 11:45" 11 45 } -multipath_ipv6_l4() -{ - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 -} - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index d40183b4eccc..2ab9eaaa5532 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -280,42 +280,42 @@ send_src_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_src_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } send_src_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_flowlabel() @@ -331,14 +331,14 @@ send_src_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } custom_hash_test() diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh index a257979d3fc5..32d1461f37b7 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh @@ -266,7 +266,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh index d208f5243ade..e1a4b50505f5 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh @@ -265,9 +265,9 @@ multipath6_test() local t0_222=$(tc_rule_stats_get $ul32 222 ingress) ip vrf exec v$h1 \ - $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \ - -B "2001:db8:2::2-2001:db8:2::1e" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \ + -B "2001:db8:2::2-2001:db8:2::3e" \ + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh index 58a3597037b1..24f4ab328bd2 100644 --- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -356,7 +356,7 @@ test_traffic_ip4ip6() flower $TC_FLAG dst_ip 203.0.113.1 action pass $MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \ - -B 203.0.113.1 -t ip -q -d 1msec + -B 203.0.113.1 -t ip -q -d $MZ_DELAY # Check ports after encap and after decap. tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 @@ -389,7 +389,7 @@ test_traffic_ip6ip6() flower $TC_FLAG dst_ip 2001:db8:2::1 action pass $MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \ - -B 2001:db8:2::1 -t ip -q -d 1msec + -B 2001:db8:2::1 -t ip -q -d $MZ_DELAY # Check ports after encap and after decap. tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index db3688f52888..d1bf39eaf2b3 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -8,6 +8,7 @@ PING=${PING:=ping} PING6=${PING6:=ping6} MZ=${MZ:=mausezahn} +MZ_DELAY=${MZ_DELAY:=0} ARPING=${ARPING:=arping} TEAMD=${TEAMD:=teamd} WAIT_TIME=${WAIT_TIME:=5} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index a0d612e04990..982e0d098ea9 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -204,7 +204,7 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -218,7 +218,7 @@ multipath4_test() sysctl_restore net.ipv4.fib_multipath_hash_policy } -multipath6_l4_test() +multipath6_test() { local desc="$1" local weight_rp12=$2 @@ -237,7 +237,7 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -251,34 +251,6 @@ multipath6_l4_test() sysctl_restore net.ipv6.fib_multipath_hash_policy } -multipath6_test() -{ - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 - local t0_rp12 t0_rp13 t1_rp12 t1_rp13 - local packets_rp12 packets_rp13 - - ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13 - - t0_rp12=$(link_stats_tx_packets_get $rp12) - t0_rp13=$(link_stats_tx_packets_get $rp13) - - # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done - - t1_rp12=$(link_stats_tx_packets_get $rp12) - t1_rp13=$(link_stats_tx_packets_get $rp13) - - let "packets_rp12 = $t1_rp12 - $t0_rp12" - let "packets_rp13 = $t1_rp13 - $t0_rp13" - multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 - - ip nexthop replace id 106 group 104/105 -} - multipath_test() { log_info "Running IPv4 multipath tests" @@ -301,11 +273,6 @@ multipath_test() multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 multipath6_test "Weighted MP 11:45" 11 45 - - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 } ping_ipv4_blackhole() diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh index cb08ffe2356a..a60ff54723b7 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -205,7 +205,7 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -235,7 +235,7 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 464821c587a5..e2be354167a1 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -179,7 +179,7 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -195,7 +195,7 @@ multipath4_test() sysctl_restore net.ipv4.fib_multipath_hash_policy } -multipath6_l4_test() +multipath6_test() { local desc="$1" local weight_rp12=$2 @@ -216,7 +216,7 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -232,38 +232,6 @@ multipath6_l4_test() sysctl_restore net.ipv6.fib_multipath_hash_policy } -multipath6_test() -{ - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 - local t0_rp12 t0_rp13 t1_rp12 t1_rp13 - local packets_rp12 packets_rp13 - - ip route replace 2001:db8:2::/64 vrf vrf-r1 \ - nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ - nexthop via fe80:3::23 dev $rp13 weight $weight_rp13 - - t0_rp12=$(link_stats_tx_packets_get $rp12) - t0_rp13=$(link_stats_tx_packets_get $rp13) - - # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null - done - - t1_rp12=$(link_stats_tx_packets_get $rp12) - t1_rp13=$(link_stats_tx_packets_get $rp13) - - let "packets_rp12 = $t1_rp12 - $t0_rp12" - let "packets_rp13 = $t1_rp13 - $t0_rp13" - multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 - - ip route replace 2001:db8:2::/64 vrf vrf-r1 \ - nexthop via fe80:2::22 dev $rp12 \ - nexthop via fe80:3::23 dev $rp13 -} - multipath_test() { log_info "Running IPv4 multipath tests" @@ -275,11 +243,6 @@ multipath_test() multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 multipath6_test "Weighted MP 11:45" 11 45 - - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 } setup_prepare() diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 0a51eef21b9e..5103f64a71d6 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -140,7 +140,7 @@ police_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -157,7 +157,7 @@ police_rx_test() # Rule to police traffic destined to $h2 on ingress of $rp1 tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok + action police rate 10mbit burst 16k conform-exceed drop/ok police_common_test "police on rx" @@ -169,7 +169,7 @@ police_tx_test() # Rule to police traffic destined to $h2 on egress of $rp2 tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok + action police rate 10mbit burst 16k conform-exceed drop/ok police_common_test "police on tx" @@ -190,7 +190,7 @@ police_shared_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -211,7 +211,7 @@ police_shared_test() # Rule to police traffic destined to $h2 on ingress of $rp1 tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok \ + action police rate 10mbit burst 16k conform-exceed drop/ok \ index 10 # Rule to police a different flow destined to $h2 on egress of $rp2 @@ -250,7 +250,7 @@ police_mirror_common_test() # Rule to police traffic destined to $h2 and mirror to $h3 tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/pipe \ + action police rate 10mbit burst 16k conform-exceed drop/pipe \ action mirred egress mirror dev $rp3 mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ @@ -260,7 +260,7 @@ police_mirror_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -270,7 +270,7 @@ police_mirror_common_test() sleep 10 local t1=$(tc_rule_stats_get $h3 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index eb307ca37bfa..6f0a2e452ba1 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -495,7 +495,7 @@ vxlan_ping_test() local delta=$((t1 - t0)) # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((expect <= delta && delta <= expect + 5)) check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." } @@ -532,7 +532,7 @@ __test_ecn_encap() RET=0 tc filter add dev v1 egress pref 77 prot ip \ - flower ip_tos $tos action pass + flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass sleep 1 vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10 tc filter del dev v1 egress pref 77 prot ip diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh index ac97f07e5ce8..a0bb4524e1e9 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh @@ -616,7 +616,7 @@ vxlan_ping_test() local delta=$((t1 - t0)) # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((expect <= delta && delta <= expect + 5)) check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." } @@ -653,7 +653,7 @@ __test_ecn_encap() RET=0 tc filter add dev v1 egress pref 77 protocol ipv6 \ - flower ip_tos $tos action pass + flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass sleep 1 vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10 tc filter del dev v1 egress pref 77 protocol ipv6 diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index a596bbf3ed6a..fb9a34cb50c6 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -750,7 +750,7 @@ __test_learning() expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 vxlan_flood_test $mac $dst $vid "${expects[@]}" - sleep 20 + sleep 60 bridge fdb show brport $vx | grep $mac | grep -q self check_fail $? @@ -796,11 +796,11 @@ test_learning() local dst=192.0.2.100 local vid=10 - # Enable learning on the VxLAN devices and set ageing time to 10 seconds - ip link set dev br1 type bridge ageing_time 1000 - ip link set dev vx10 type vxlan ageing 10 + # Enable learning on the VxLAN devices and set ageing time to 30 seconds + ip link set dev br1 type bridge ageing_time 3000 + ip link set dev vx10 type vxlan ageing 30 ip link set dev vx10 type vxlan learning - ip link set dev vx20 type vxlan ageing 10 + ip link set dev vx20 type vxlan ageing 30 ip link set dev vx20 type vxlan learning reapply_config diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c index 6ebd58869a63..193b82745fd8 100644 --- a/tools/testing/selftests/net/ip_local_port_range.c +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -365,9 +365,6 @@ TEST_F(ip_local_port_range, late_bind) __u32 range; __u16 port; - if (variant->so_protocol == IPPROTO_SCTP) - SKIP(return, "SCTP doesn't support IP_BIND_ADDRESS_NO_PORT"); - fd = socket(variant->so_domain, variant->so_type, 0); ASSERT_GE(fd, 0) TH_LOG("socket failed"); @@ -414,6 +411,9 @@ TEST_F(ip_local_port_range, late_bind) ASSERT_TRUE(!err) TH_LOG("close failed"); } +XFAIL_ADD(ip_local_port_range, ip4_stcp, late_bind); +XFAIL_ADD(ip_local_port_range, ip6_stcp, late_bind); + TEST_F(ip_local_port_range, get_port_range) { __u16 lo, hi; diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 8573326d326a..cc8bdf95b104 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -20,7 +20,7 @@ flush_pids() ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null - for _ in $(seq 10); do + for _ in $(seq $((timeout_poll * 10))); do [ -z "$(ip netns pids "${ns}")" ] && break sleep 0.1 done @@ -69,7 +69,7 @@ __chk_nr() else echo "[ fail ] expected $expected found $nr" mptcp_lib_result_fail "${msg}" - ret=$test_cnt + ret=${KSFT_FAIL} fi else echo "[ ok ]" @@ -91,6 +91,15 @@ chk_msk_nr() __chk_msk_nr "grep -c token:" "$@" } +chk_listener_nr() +{ + local expected=$1 + local msg="$2" + + __chk_nr "ss -nlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0 + __chk_nr "ss -nlHtON $ns | wc -l" "$expected" "$msg - subflows" +} + wait_msk_nr() { local condition="grep -c token:" @@ -115,11 +124,11 @@ wait_msk_nr() if [ $i -ge $timeout ]; then echo "[ fail ] timeout while expecting $expected max $max last $nr" mptcp_lib_result_fail "${msg} # timeout" - ret=$test_cnt + ret=${KSFT_FAIL} elif [ $nr != $expected ]; then echo "[ fail ] expected $expected found $nr" mptcp_lib_result_fail "${msg} # unexpected result" - ret=$test_cnt + ret=${KSFT_FAIL} else echo "[ ok ]" mptcp_lib_result_pass "${msg}" @@ -289,5 +298,21 @@ flush_pids chk_msk_inuse 0 "many->0" chk_msk_cestab 0 "many->0" +chk_listener_nr 0 "no listener sockets" +NR_SERVERS=100 +for I in $(seq 1 $NR_SERVERS); do + ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \ + -t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 & +done +mptcp_lib_wait_local_port_listen $ns $((NR_SERVERS + 20001)) + +chk_listener_nr $NR_SERVERS "many listener sockets" + +# graceful termination +for I in $(seq 1 $NR_SERVERS); do + echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 & +done +flush_pids + mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 3f198743cb03..955ee651dcd5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -21,6 +21,7 @@ cinfail="" cinsent="" tmpfile="" cout="" +err="" capout="" ns1="" ns2="" @@ -161,6 +162,11 @@ check_tools() exit $ksft_skip fi + if ! ss -h | grep -q MPTCP; then + echo "SKIP: ss tool does not support MPTCP" + exit $ksft_skip + fi + # Use the legacy version if available to support old kernel versions if iptables-legacy -V &> /dev/null; then iptables="iptables-legacy" @@ -184,6 +190,7 @@ init() { cin=$(mktemp) cinsent=$(mktemp) cout=$(mktemp) + err=$(mktemp) evts_ns1=$(mktemp) evts_ns2=$(mktemp) @@ -199,6 +206,7 @@ cleanup() rm -f "$sin" "$sout" "$cinsent" "$cinfail" rm -f "$tmpfile" rm -rf $evts_ns1 $evts_ns2 + rm -f "$err" cleanup_partial } @@ -3333,16 +3341,17 @@ userspace_pm_rm_sf() { local evts=$evts_ns1 local t=${3:-1} - local ip=4 + local ip local tk da dp sp local cnt [ "$1" == "$ns2" ] && evts=$evts_ns2 - if mptcp_lib_is_v6 $2; then ip=6; fi + [ -n "$(mptcp_lib_evts_get_info "saddr4" "$evts" $t)" ] && ip=4 + [ -n "$(mptcp_lib_evts_get_info "saddr6" "$evts" $t)" ] && ip=6 tk=$(mptcp_lib_evts_get_info token "$evts") - da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t) - dp=$(mptcp_lib_evts_get_info dport "$evts" $t) - sp=$(mptcp_lib_evts_get_info sport "$evts" $t) + da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t $2) + dp=$(mptcp_lib_evts_get_info dport "$evts" $t $2) + sp=$(mptcp_lib_evts_get_info sport "$evts" $t $2) cnt=$(rm_sf_count ${1}) ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \ @@ -3350,6 +3359,77 @@ userspace_pm_rm_sf() wait_rm_sf $1 "${cnt}" } +check_output() +{ + local cmd="$1" + local expected="$2" + local msg="$3" + local rc=0 + + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + fail_test "fail to check output # error ${rc}" + elif [ ${rc} -eq 0 ]; then + print_ok + elif [ ${rc} -eq 1 ]; then + fail_test "fail to check output # different output" + fi +} + +# $1: ns +userspace_pm_dump() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl dump token $tk +} + +# $1: ns ; $2: id +userspace_pm_get_addr() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl get $2 token $tk +} + +userspace_pm_chk_dump_addr() +{ + local ns="${1}" + local exp="${2}" + local check="${3}" + + print_check "dump addrs ${check}" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_dump_addr$"; then + check_output "userspace_pm_dump ${ns}" "${exp}" + else + print_skip + fi +} + +userspace_pm_chk_get_addr() +{ + local ns="${1}" + local id="${2}" + local exp="${3}" + + print_check "get id ${id} addr" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_get_addr$"; then + check_output "userspace_pm_get_addr ${ns} ${id}" "${exp}" + else + print_skip + fi +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3429,20 +3509,31 @@ userspace_tests() if reset_with_events "userspace pm add & remove address" && continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns2 2 2 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! wait_mpj $ns1 userspace_pm_add_addr $ns1 10.0.2.1 10 - chk_join_nr 1 1 1 - chk_add_nr 1 1 - chk_mptcp_info subflows 1 subflows 1 - chk_subflows_total 2 2 - chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + userspace_pm_add_addr $ns1 10.0.3.1 20 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_mptcp_info subflows 2 subflows 2 + chk_subflows_total 3 3 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + userspace_pm_chk_dump_addr "${ns1}" \ + $'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \ + "signal" + userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" + userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" userspace_pm_rm_addr $ns1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED - chk_rm_nr 1 1 invert + userspace_pm_chk_dump_addr "${ns1}" \ + "id 20 flags signal 10.0.3.1" "after rm_addr 10" + userspace_pm_rm_addr $ns1 20 + userspace_pm_rm_sf $ns1 10.0.3.1 $SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" + chk_rm_nr 2 2 invert chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids @@ -3462,8 +3553,15 @@ userspace_tests() chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 20 flags subflow 10.0.3.2" \ + "subflow" + userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" userspace_pm_rm_addr $ns2 20 userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns2}" \ + "" \ + "after rm_addr 20" chk_rm_nr 1 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 @@ -3483,6 +3581,8 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 userspace_pm_add_sf $ns2 10.0.3.2 0 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 0 flags subflow 10.0.3.2" "id 0 subflow" chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 037cb3e84330..438f557aac90 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -234,9 +234,9 @@ mptcp_lib_get_info_value() { grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' } -# $1: info name ; $2: evts_ns ; $3: event type +# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]] mptcp_lib_evts_get_info() { - mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}" + grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1}," } # $1: PID @@ -319,3 +319,26 @@ mptcp_lib_wait_local_port_listen() { sleep 0.1 done } + +mptcp_lib_check_output() { + local err="${1}" + local cmd="${2}" + local expected="${3}" + local cmd_ret=0 + local out + + if ! out=$(${cmd} 2>"${err}"); then + cmd_ret=${?} + fi + + if [ ${cmd_ret} -ne 0 ]; then + mptcp_lib_print_err "[FAIL] command execution '${cmd}' stderr" + cat "${err}" + return 2 + elif [ "${out}" = "${expected}" ]; then + return 0 + else + mptcp_lib_print_err "[FAIL] expected '${expected}' got '${out}'" + return 1 + fi +} diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index ebfefae71e13..705106d60db5 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -54,21 +54,17 @@ check() local cmd="$1" local expected="$2" local msg="$3" - local out=`$cmd 2>$err` - local cmd_ret=$? + local rc=0 printf "%-50s" "$msg" - if [ $cmd_ret -ne 0 ]; then - echo "[FAIL] command execution '$cmd' stderr " - cat $err - mptcp_lib_result_fail "${msg} # error ${cmd_ret}" + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + mptcp_lib_result_fail "${msg} # error ${rc}" ret=1 - elif [ "$out" = "$expected" ]; then - echo "[ OK ]" + elif [ ${rc} -eq 0 ]; then + mptcp_lib_print_ok "[ OK ]" mptcp_lib_result_pass "${msg}" - else - echo -n "[FAIL] " - echo "expected '$expected' got '$out'" + elif [ ${rc} -eq 1 ]; then mptcp_lib_result_fail "${msg} # different output" ret=1 fi diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 49369c4a5f26..7426a2cbd4a0 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -453,6 +453,7 @@ int csf(int fd, int pm_family, int argc, char *argv[]) char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024]; + u_int32_t flags = MPTCP_PM_ADDR_FLAG_SUBFLOW; const char *params[5]; struct nlmsghdr *nh; struct rtattr *addr; @@ -558,6 +559,13 @@ int csf(int fd, int pm_family, int argc, char *argv[]) off += NLMSG_ALIGN(rta->rta_len); } + /* addr flags */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + addr->rta_len = off - addr_start; } @@ -1079,6 +1087,7 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int32_t token = 0; int nest_start; u_int8_t id; int off = 0; @@ -1089,10 +1098,12 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) MPTCP_PM_VER); /* the only argument is the address id */ - if (argc != 3) + if (argc != 3 && argc != 5) syntax(argv); id = atoi(argv[2]); + if (argc == 5 && !strcmp(argv[3], "token")) + token = strtoul(argv[4], NULL, 10); nest_start = off; nest = (void *)(data + off); @@ -1108,6 +1119,15 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) off += NLMSG_ALIGN(rta->rta_len); nest->rta_len = off - nest_start; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } @@ -1119,8 +1139,16 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) 1024]; pid_t pid = getpid(); struct nlmsghdr *nh; + u_int32_t token = 0; + struct rtattr *rta; int off = 0; + if (argc != 2 && argc != 4) + syntax(argv); + + if (argc == 4 && !strcmp(argv[2], "token")) + token = strtoul(argv[3], NULL, 10); + memset(data, 0, sizeof(data)); nh = (void *)data; off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR, @@ -1130,6 +1158,15 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) nh->nlmsg_pid = pid; nh->nlmsg_len = off; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 874a2952aa8e..bdf6f10d0558 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -801,6 +801,8 @@ kci_test_ipsec_offload() end_test "FAIL: ipsec_offload SA offload missing from list output" fi + # we didn't create a peer, make sure we can Tx + ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55 # use ping to exercise the Tx path ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index b95c249f81c2..c6eda21cefb6 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1927,7 +1927,7 @@ TEST_F(tls_err, poll_partial_rec_async) pfd.events = POLLIN; EXPECT_EQ(poll(&pfd, 1, 20), 1); - exit(!_metadata->passed); + exit(!__test_passed(_metadata)); } } diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 27574bbf2d63..5ae85def0739 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -247,6 +247,20 @@ chk_gro " - aggregation with TSO off" 1 cleanup create_ns +ip -n $NS_DST link set dev veth$DST up +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp +chk_gro_flag "gro vs xdp while down - gro flag on" $DST on +ip -n $NS_DST link set dev veth$DST down +chk_gro_flag " - after down" $DST on +ip -n $NS_DST link set dev veth$DST xdp off +chk_gro_flag " - after xdp off" $DST off +ip -n $NS_DST link set dev veth$DST up +chk_gro_flag " - after up" $DST off +ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp +chk_gro_flag " - after peer xdp" $DST off +cleanup + +create_ns chk_channels "default channels" $DST 1 1 ip -n $NS_DST link set dev veth$DST down diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index db27153eb4a0..936c3085bb83 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -7,7 +7,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \ - conntrack_sctp_collision.sh xt_string.sh + conntrack_sctp_collision.sh xt_string.sh \ + bridge_netfilter.sh HOSTPKG_CONFIG := pkg-config diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh new file mode 100644 index 000000000000..659b3ab02c8b --- /dev/null +++ b/tools/testing/selftests/netfilter/bridge_netfilter.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bridge netfilter + conntrack, a combination that doesn't really work, +# with multicast/broadcast packets racing for hash table insertion. + +# eth0 br0 eth0 +# setup is: ns1 <->,ns0 <-> ns3 +# ns2 <-' `'-> ns4 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" +ns2="ns2-$sfx" +ns3="ns3-$sfx" +ns4="ns4-$sfx" + +ebtables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ebtables" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +for i in $(seq 0 4); do + eval ip netns add \$ns$i +done + +cleanup() { + for i in $(seq 0 4); do eval ip netns del \$ns$i;done +} + +trap cleanup EXIT + +do_ping() +{ + fromns="$1" + dstip="$2" + + ip netns exec $fromns ping -c 1 -q $dstip > /dev/null + if [ $? -ne 0 ]; then + echo "ERROR: ping from $fromns to $dstip" + ip netns exec ${ns0} nft list ruleset + ret=1 + fi +} + +bcast_ping() +{ + fromns="$1" + dstip="$2" + + for i in $(seq 1 1000); do + ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "ERROR: ping -b from $fromns to $dstip" + ip netns exec ${ns0} nft list ruleset + fi + done +} + +ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1} +if [ $? -ne 0 ]; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi + +ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2 +ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3 +ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4 + +ip -net ${ns0} link set lo up + +for i in $(seq 1 4); do + ip -net ${ns0} link set veth$i up +done + +ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1 +if [ $? -ne 0 ]; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +# make veth0,1,2 part of bridge. +for i in $(seq 1 3); do + ip -net ${ns0} link set veth$i master br0 +done + +# add a macvlan on top of the bridge. +MACVLAN_ADDR=ba:f3:13:37:42:23 +ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private +ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR} +ip -net ${ns0} link set macvlan0 up +ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0 + +# add a macvlan on top of veth4. +MACVLAN_ADDR=ba:f3:13:37:42:24 +ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa +ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR} +ip -net ${ns0} link set macvlan4 up + +# make the macvlan part of the bridge. +# veth4 is not a bridge port, only the macvlan on top of it. +ip -net ${ns0} link set macvlan4 master br0 + +ip -net ${ns0} link set br0 up +ip -net ${ns0} addr add 10.0.0.1/24 dev br0 +ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1 +ret=$? +if [ $ret -ne 0 ] ; then + echo "SKIP: bridge netfilter not available" + ret=$ksft_skip +fi + +# for testing, so namespaces will reply to ping -b probes. +ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0 + +# enable conntrack in ns0 and drop broadcast packets in forward to +# avoid them from getting confirmed in the postrouting hook before +# the cloned skb is passed up the stack. +ip netns exec ${ns0} nft -f - <<EOF +table ip filter { + chain input { + type filter hook input priority 1; policy accept + iifname br0 counter + ct state new accept + } +} + +table bridge filter { + chain forward { + type filter hook forward priority 0; policy accept + meta pkttype broadcast ip protocol icmp counter drop + } +} +EOF + +# place 1, 2 & 3 in same subnet, connected via ns0:br0. +# ns4 is placed in same subnet as well, but its not +# part of the bridge: the corresponding veth4 is not +# part of the bridge, only its macvlan interface. +for i in $(seq 1 4); do + eval ip -net \$ns$i link set lo up + eval ip -net \$ns$i link set eth0 up +done +for i in $(seq 1 2); do + eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0 +done + +ip -net ${ns3} addr add 10.23.0.13/24 dev eth0 +ip -net ${ns4} addr add 10.23.0.14/24 dev eth0 + +# test basic connectivity +do_ping ${ns1} 10.0.0.12 +do_ping ${ns3} 10.23.0.1 +do_ping ${ns4} 10.23.0.1 + +if [ $ret -eq 0 ];then + echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0" +fi + +bcast_ping ${ns1} 10.0.0.255 + +# This should deliver broadcast to macvlan0, which is on top of ns0:br0. +bcast_ping ${ns3} 10.23.0.255 + +# same, this time via veth4:macvlan4. +bcast_ping ${ns4} 10.23.0.255 + +read t < /proc/sys/kernel/tainted + +if [ $t -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + ret=1 +fi + +exit $ret diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c index 7b1addd50420..8a64f63e37ce 100644 --- a/tools/testing/selftests/powerpc/math/fpu_signal.c +++ b/tools/testing/selftests/powerpc/math/fpu_signal.c @@ -18,6 +18,7 @@ #include <pthread.h> #include "utils.h" +#include "fpu.h" /* Number of times each thread should receive the signal */ #define ITERATIONS 10 @@ -27,9 +28,7 @@ */ #define THREAD_FACTOR 8 -__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, - 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, - 2.1}; +__thread double darray[32]; bool bad_context; int threads_starting; @@ -43,9 +42,9 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context) ucontext_t *uc = context; mcontext_t *mc = &uc->uc_mcontext; - /* Only the non volatiles were loaded up */ - for (i = 14; i < 32; i++) { - if (mc->fp_regs[i] != darray[i - 14]) { + // Don't check f30/f31, they're used as scratches in check_all_fprs() + for (i = 0; i < 30; i++) { + if (mc->fp_regs[i] != darray[i]) { bad_context = true; break; } @@ -54,7 +53,6 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context) void *signal_fpu_c(void *p) { - int i; long rc; struct sigaction act; act.sa_sigaction = signal_fpu_sig; @@ -64,9 +62,7 @@ void *signal_fpu_c(void *p) return p; srand(pthread_self()); - for (i = 0; i < 21; i++) - darray[i] = rand(); - + randomise_darray(darray, ARRAY_SIZE(darray)); rc = preempt_fpu(darray, &threads_starting, &running); return (void *) rc; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 38f651469968..1027e6170186 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1576,7 +1576,7 @@ void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, ASSERT_EQ(0, ret); } /* Directly report the status of our test harness results. */ - syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + syscall(__NR_exit, _metadata->exit_code); } /* Common tracer setup/teardown functions. */ @@ -1623,7 +1623,7 @@ void teardown_trace_fixture(struct __test_metadata *_metadata, ASSERT_EQ(0, kill(tracer, SIGUSR1)); ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); if (WEXITSTATUS(status)) - _metadata->passed = 0; + _metadata->exit_code = KSFT_FAIL; } } @@ -3088,8 +3088,7 @@ TEST(syscall_restart) } /* Directly report the status of our test harness results. */ - syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS - : EXIT_FAILURE); + syscall(__NR_exit, _metadata->exit_code); } EXPECT_EQ(0, close(pipefd[0])); @@ -3174,7 +3173,7 @@ TEST(syscall_restart) ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); if (WIFSIGNALED(status) || WEXITSTATUS(status)) - _metadata->passed = 0; + _metadata->exit_code = KSFT_FAIL; } TEST_SIGNAL(filter_flag_log, SIGSYS) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index 795cf1ce8af0..b73bd255ea36 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -657,6 +657,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -711,6 +712,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -765,6 +767,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -819,6 +822,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -873,6 +877,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -937,6 +942,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -995,6 +1001,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore index 9934d48d9a55..7e47b281c442 100644 --- a/tools/virtio/.gitignore +++ b/tools/virtio/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only *.d virtio_test +vhost_net_test vringh_test virtio-trace/trace-agent diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index d128925980e0..e25e99c1c3b7 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -1,8 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 all: test mod -test: virtio_test vringh_test +test: virtio_test vringh_test vhost_net_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o +vhost_net_test: virtio_ring.o vhost_net_test.o try-run = $(shell set -e; \ if ($(1)) >/dev/null 2>&1; \ @@ -49,6 +50,7 @@ oot-clean: OOT_BUILD+=clean .PHONY: all test mod clean vhost oot oot-clean oot-build clean: - ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ - vhost_test/Module.symvers vhost_test/modules.order *.d + ${RM} *.o vringh_test virtio_test vhost_net_test vhost_test/*.o \ + vhost_test/.*.cmd vhost_test/Module.symvers \ + vhost_test/modules.order *.d -include *.d diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index 2a8a70e2a950..42a564f22f2d 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VIRTIO_CONFIG_H +#define LINUX_VIRTIO_CONFIG_H #include <linux/virtio_byteorder.h> #include <linux/virtio.h> #include <uapi/linux/virtio_config.h> @@ -95,3 +97,5 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) { return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } + +#endif diff --git a/tools/virtio/vhost_net_test.c b/tools/virtio/vhost_net_test.c new file mode 100644 index 000000000000..389d99a6d7c7 --- /dev/null +++ b/tools/virtio/vhost_net_test.c @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <getopt.h> +#include <limits.h> +#include <string.h> +#include <poll.h> +#include <sys/eventfd.h> +#include <stdlib.h> +#include <assert.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> +#include <stdbool.h> +#include <linux/vhost.h> +#include <linux/if.h> +#include <linux/if_tun.h> +#include <linux/in.h> +#include <linux/if_packet.h> +#include <linux/virtio_net.h> +#include <netinet/ether.h> + +#define HDR_LEN sizeof(struct virtio_net_hdr_mrg_rxbuf) +#define TEST_BUF_LEN 256 +#define TEST_PTYPE ETH_P_LOOPBACK +#define DESC_NUM 256 + +/* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */ +void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; + +struct vq_info { + int kick; + int call; + int idx; + long started; + long completed; + struct pollfd fds; + void *ring; + /* copy used for control */ + struct vring vring; + struct virtqueue *vq; +}; + +struct vdev_info { + struct virtio_device vdev; + int control; + struct vq_info vqs[2]; + int nvqs; + void *buf; + size_t buf_size; + char *test_buf; + char *res_buf; + struct vhost_memory *mem; + int sock; + int ifindex; + unsigned char mac[ETHER_ADDR_LEN]; +}; + +static int tun_alloc(struct vdev_info *dev, char *tun_name) +{ + struct ifreq ifr; + int len = HDR_LEN; + int fd, e; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + perror("Cannot open /dev/net/tun"); + return fd; + } + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ); + + e = ioctl(fd, TUNSETIFF, &ifr); + if (e < 0) { + perror("ioctl[TUNSETIFF]"); + close(fd); + return e; + } + + e = ioctl(fd, TUNSETVNETHDRSZ, &len); + if (e < 0) { + perror("ioctl[TUNSETVNETHDRSZ]"); + close(fd); + return e; + } + + e = ioctl(fd, SIOCGIFHWADDR, &ifr); + if (e < 0) { + perror("ioctl[SIOCGIFHWADDR]"); + close(fd); + return e; + } + + memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); + return fd; +} + +static void vdev_create_socket(struct vdev_info *dev, char *tun_name) +{ + struct ifreq ifr; + + dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE)); + assert(dev->sock != -1); + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ); + assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0); + + dev->ifindex = ifr.ifr_ifindex; + + /* Set the flags that bring the device up */ + assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0); + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0); +} + +static void vdev_send_packet(struct vdev_info *dev) +{ + char *sendbuf = dev->test_buf + HDR_LEN; + struct sockaddr_ll saddrll = {0}; + int sockfd = dev->sock; + int ret; + + saddrll.sll_family = PF_PACKET; + saddrll.sll_ifindex = dev->ifindex; + saddrll.sll_halen = ETH_ALEN; + saddrll.sll_protocol = htons(TEST_PTYPE); + + ret = sendto(sockfd, sendbuf, TEST_BUF_LEN, 0, + (struct sockaddr *)&saddrll, + sizeof(struct sockaddr_ll)); + assert(ret >= 0); +} + +static bool vq_notify(struct virtqueue *vq) +{ + struct vq_info *info = vq->priv; + unsigned long long v = 1; + int r; + + r = write(info->kick, &v, sizeof(v)); + assert(r == sizeof(v)); + + return true; +} + +static void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) +{ + struct vhost_vring_addr addr = { + .index = info->idx, + .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, + .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail, + .used_user_addr = (uint64_t)(unsigned long)info->vring.used, + }; + struct vhost_vring_state state = { .index = info->idx }; + struct vhost_vring_file file = { .index = info->idx }; + int r; + + state.num = info->vring.num; + r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); + assert(r >= 0); + + state.num = 0; + r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); + assert(r >= 0); + + r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); + assert(r >= 0); + + file.fd = info->kick; + r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); + assert(r >= 0); +} + +static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) +{ + if (info->vq) + vring_del_virtqueue(info->vq); + + memset(info->ring, 0, vring_size(num, 4096)); + vring_init(&info->vring, num, info->ring, 4096); + info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false, + info->ring, vq_notify, NULL, "test"); + assert(info->vq); + info->vq->priv = info; +} + +static void vq_info_add(struct vdev_info *dev, int idx, int num, int fd) +{ + struct vhost_vring_file backend = { .index = idx, .fd = fd }; + struct vq_info *info = &dev->vqs[idx]; + int r; + + info->idx = idx; + info->kick = eventfd(0, EFD_NONBLOCK); + r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); + assert(r >= 0); + vq_reset(info, num, &dev->vdev); + vhost_vq_setup(dev, info); + + r = ioctl(dev->control, VHOST_NET_SET_BACKEND, &backend); + assert(!r); +} + +static void vdev_info_init(struct vdev_info *dev, unsigned long long features) +{ + struct ether_header *eh; + int i, r; + + dev->vdev.features = features; + INIT_LIST_HEAD(&dev->vdev.vqs); + spin_lock_init(&dev->vdev.vqs_list_lock); + + dev->buf_size = (HDR_LEN + TEST_BUF_LEN) * 2; + dev->buf = malloc(dev->buf_size); + assert(dev->buf); + dev->test_buf = dev->buf; + dev->res_buf = dev->test_buf + HDR_LEN + TEST_BUF_LEN; + + memset(dev->test_buf, 0, HDR_LEN + TEST_BUF_LEN); + eh = (struct ether_header *)(dev->test_buf + HDR_LEN); + eh->ether_type = htons(TEST_PTYPE); + memcpy(eh->ether_dhost, dev->mac, ETHER_ADDR_LEN); + memcpy(eh->ether_shost, dev->mac, ETHER_ADDR_LEN); + + for (i = sizeof(*eh); i < TEST_BUF_LEN; i++) + dev->test_buf[i + HDR_LEN] = (char)i; + + dev->control = open("/dev/vhost-net", O_RDWR); + assert(dev->control >= 0); + + r = ioctl(dev->control, VHOST_SET_OWNER, NULL); + assert(r >= 0); + + dev->mem = malloc(offsetof(struct vhost_memory, regions) + + sizeof(dev->mem->regions[0])); + assert(dev->mem); + memset(dev->mem, 0, offsetof(struct vhost_memory, regions) + + sizeof(dev->mem->regions[0])); + dev->mem->nregions = 1; + dev->mem->regions[0].guest_phys_addr = (long)dev->buf; + dev->mem->regions[0].userspace_addr = (long)dev->buf; + dev->mem->regions[0].memory_size = dev->buf_size; + + r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + assert(r >= 0); + + r = ioctl(dev->control, VHOST_SET_FEATURES, &features); + assert(r >= 0); + + dev->nvqs = 2; +} + +static void wait_for_interrupt(struct vq_info *vq) +{ + unsigned long long val; + + poll(&vq->fds, 1, 100); + + if (vq->fds.revents & POLLIN) + read(vq->fds.fd, &val, sizeof(val)); +} + +static void verify_res_buf(char *res_buf) +{ + int i; + + for (i = ETHER_HDR_LEN; i < TEST_BUF_LEN; i++) + assert(res_buf[i] == (char)i); +} + +static void run_tx_test(struct vdev_info *dev, struct vq_info *vq, + bool delayed, int bufs) +{ + long long spurious = 0; + struct scatterlist sl; + unsigned int len; + int r; + + for (;;) { + long started_before = vq->started; + long completed_before = vq->completed; + + virtqueue_disable_cb(vq->vq); + do { + while (vq->started < bufs && + (vq->started - vq->completed) < 1) { + sg_init_one(&sl, dev->test_buf, HDR_LEN + TEST_BUF_LEN); + r = virtqueue_add_outbuf(vq->vq, &sl, 1, + dev->test_buf + vq->started, + GFP_ATOMIC); + if (unlikely(r != 0)) + break; + + ++vq->started; + + if (unlikely(!virtqueue_kick(vq->vq))) { + r = -1; + break; + } + } + + if (vq->started >= bufs) + r = -1; + + /* Flush out completed bufs if any */ + while (virtqueue_get_buf(vq->vq, &len)) { + int n; + + n = recvfrom(dev->sock, dev->res_buf, TEST_BUF_LEN, 0, NULL, NULL); + assert(n == TEST_BUF_LEN); + verify_res_buf(dev->res_buf); + + ++vq->completed; + r = 0; + } + } while (r == 0); + + if (vq->completed == completed_before && vq->started == started_before) + ++spurious; + + assert(vq->completed <= bufs); + assert(vq->started <= bufs); + if (vq->completed == bufs) + break; + + if (delayed) { + if (virtqueue_enable_cb_delayed(vq->vq)) + wait_for_interrupt(vq); + } else { + if (virtqueue_enable_cb(vq->vq)) + wait_for_interrupt(vq); + } + } + printf("TX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", + spurious, vq->started, vq->completed); +} + +static void run_rx_test(struct vdev_info *dev, struct vq_info *vq, + bool delayed, int bufs) +{ + long long spurious = 0; + struct scatterlist sl; + unsigned int len; + int r; + + for (;;) { + long started_before = vq->started; + long completed_before = vq->completed; + + do { + while (vq->started < bufs && + (vq->started - vq->completed) < 1) { + sg_init_one(&sl, dev->res_buf, HDR_LEN + TEST_BUF_LEN); + + r = virtqueue_add_inbuf(vq->vq, &sl, 1, + dev->res_buf + vq->started, + GFP_ATOMIC); + if (unlikely(r != 0)) + break; + + ++vq->started; + + vdev_send_packet(dev); + + if (unlikely(!virtqueue_kick(vq->vq))) { + r = -1; + break; + } + } + + if (vq->started >= bufs) + r = -1; + + /* Flush out completed bufs if any */ + while (virtqueue_get_buf(vq->vq, &len)) { + struct ether_header *eh; + + eh = (struct ether_header *)(dev->res_buf + HDR_LEN); + + /* tun netdev is up and running, only handle the + * TEST_PTYPE packet. + */ + if (eh->ether_type == htons(TEST_PTYPE)) { + assert(len == TEST_BUF_LEN + HDR_LEN); + verify_res_buf(dev->res_buf + HDR_LEN); + } + + ++vq->completed; + r = 0; + } + } while (r == 0); + + if (vq->completed == completed_before && vq->started == started_before) + ++spurious; + + assert(vq->completed <= bufs); + assert(vq->started <= bufs); + if (vq->completed == bufs) + break; + } + + printf("RX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", + spurious, vq->started, vq->completed); +} + +static const char optstring[] = "h"; +static const struct option longopts[] = { + { + .name = "help", + .val = 'h', + }, + { + .name = "event-idx", + .val = 'E', + }, + { + .name = "no-event-idx", + .val = 'e', + }, + { + .name = "indirect", + .val = 'I', + }, + { + .name = "no-indirect", + .val = 'i', + }, + { + .name = "virtio-1", + .val = '1', + }, + { + .name = "no-virtio-1", + .val = '0', + }, + { + .name = "delayed-interrupt", + .val = 'D', + }, + { + .name = "no-delayed-interrupt", + .val = 'd', + }, + { + .name = "buf-num", + .val = 'n', + .has_arg = required_argument, + }, + { + .name = "batch", + .val = 'b', + .has_arg = required_argument, + }, + { + } +}; + +static void help(int status) +{ + fprintf(stderr, "Usage: vhost_net_test [--help]" + " [--no-indirect]" + " [--no-event-idx]" + " [--no-virtio-1]" + " [--delayed-interrupt]" + " [--buf-num]" + "\n"); + + exit(status); +} + +int main(int argc, char **argv) +{ + unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); + char tun_name[IFNAMSIZ]; + long nbufs = 0x100000; + struct vdev_info dev; + bool delayed = false; + int o, fd; + + for (;;) { + o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(2); + case 'e': + features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); + break; + case 'h': + help(0); + case 'i': + features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); + break; + case '0': + features &= ~(1ULL << VIRTIO_F_VERSION_1); + break; + case 'D': + delayed = true; + break; + case 'n': + nbufs = strtol(optarg, NULL, 10); + assert(nbufs > 0); + break; + default: + assert(0); + break; + } + } + +done: + memset(&dev, 0, sizeof(dev)); + snprintf(tun_name, IFNAMSIZ, "tun_%d", getpid()); + + fd = tun_alloc(&dev, tun_name); + assert(fd >= 0); + + vdev_info_init(&dev, features); + vq_info_add(&dev, 0, DESC_NUM, fd); + vq_info_add(&dev, 1, DESC_NUM, fd); + vdev_create_socket(&dev, tun_name); + + run_rx_test(&dev, &dev.vqs[0], delayed, nbufs); + run_tx_test(&dev, &dev.vqs[1], delayed, nbufs); + + return 0; +} |