aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/admin-guide/pm/amd-pstate.rst4
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml10
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,qman-portal.yaml2
-rw-r--r--Documentation/mm/process_addrs.rst850
-rw-r--r--MAINTAINERS6
-rw-r--r--Makefile2
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arc/include/asm/cachetype.h8
-rw-r--r--arch/arm64/boot/dts/arm/fvp-base-revc.dts2
-rw-r--r--arch/arm64/boot/dts/broadcom/bcm2712.dtsi8
-rw-r--r--arch/arm64/kernel/signal.c35
-rw-r--r--arch/hexagon/Makefile6
-rw-r--r--arch/powerpc/configs/pmac32_defconfig1
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--arch/s390/boot/startup.c2
-rw-r--r--arch/s390/boot/vmem.c6
-rw-r--r--arch/s390/kernel/ipl.c2
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/static_call.h15
-rw-r--r--arch/x86/include/asm/sync_core.h6
-rw-r--r--arch/x86/include/asm/xen/hypercall.h36
-rw-r--r--arch/x86/kernel/callthunks.c5
-rw-r--r--arch/x86/kernel/cpu/common.c38
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c58
-rw-r--r--arch/x86/kernel/static_call.c9
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kvm/mmu/mmu.c12
-rw-r--r--arch/x86/kvm/mmu/spte.h17
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c5
-rw-r--r--arch/x86/kvm/svm/avic.c6
-rw-r--r--arch/x86/kvm/svm/svm.c9
-rw-r--r--arch/x86/kvm/vmx/posted_intr.h2
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--arch/x86/xen/enlighten.c65
-rw-r--r--arch/x86/xen/enlighten_hvm.c13
-rw-r--r--arch/x86/xen/enlighten_pv.c4
-rw-r--r--arch/x86/xen/enlighten_pvh.c7
-rw-r--r--arch/x86/xen/xen-asm.S50
-rw-r--r--arch/x86/xen/xen-head.S107
-rw-r--r--arch/x86/xen/xen-ops.h9
-rw-r--r--block/bdev.c3
-rw-r--r--block/blk-mq-sysfs.c16
-rw-r--r--block/blk-mq.c40
-rw-r--r--block/blk-sysfs.c4
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c2
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c10
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c2
-rw-r--r--drivers/acpi/Kconfig4
-rw-r--r--drivers/auxdisplay/Kconfig2
-rw-r--r--drivers/block/zram/zram_drv.c15
-rw-r--r--drivers/clocksource/hyperv_timer.c14
-rw-r--r--drivers/cpufreq/amd-pstate.c50
-rw-r--r--drivers/cxl/core/region.c25
-rw-r--r--drivers/cxl/pci.c6
-rw-r--r--drivers/dma-buf/dma-buf.c2
-rw-r--r--drivers/dma-buf/udmabuf.c43
-rw-r--r--drivers/dma/amd/qdma/qdma.c28
-rw-r--r--drivers/dma/apple-admac.c7
-rw-r--r--drivers/dma/at_xdmac.c2
-rw-r--r--drivers/dma/dw/acpi.c6
-rw-r--r--drivers/dma/dw/internal.h8
-rw-r--r--drivers/dma/dw/pci.c4
-rw-r--r--drivers/dma/fsl-edma-common.h1
-rw-r--r--drivers/dma/fsl-edma-main.c41
-rw-r--r--drivers/dma/loongson2-apb-dma.c2
-rw-r--r--drivers/dma/mv_xor.c2
-rw-r--r--drivers/dma/tegra186-gpc-dma.c10
-rw-r--r--drivers/firmware/arm_ffa/bus.c15
-rw-r--r--drivers/firmware/arm_ffa/driver.c7
-rw-r--r--drivers/firmware/arm_scmi/vendors/imx/Kconfig1
-rw-r--r--drivers/firmware/imx/Kconfig1
-rw-r--r--drivers/firmware/microchip/mpfs-auto-update.c4
-rw-r--r--drivers/gpu/drm/Kconfig4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c2
-rw-r--r--drivers/gpu/drm/display/drm_dp_tunnel.c10
-rw-r--r--drivers/gpu/drm/drm_modes.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c41
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx83102.c2
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt35950.c4
-rw-r--r--drivers/gpu/drm/panel/panel-sitronix-st7701.c1
-rw-r--r--drivers/gpu/drm/panel/panel-synaptics-r63353.c2
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c3
-rw-r--r--drivers/hv/hv_balloon.c9
-rw-r--r--drivers/hv/hv_kvp.c10
-rw-r--r--drivers/hv/hv_snapshot.c9
-rw-r--r--drivers/hv/hv_util.c13
-rw-r--r--drivers/hv/hyperv_vmbus.h2
-rw-r--r--drivers/hv/vmbus_drv.c2
-rw-r--r--drivers/hwmon/tmp513.c10
-rw-r--r--drivers/macintosh/Kconfig1
-rw-r--r--drivers/media/dvb-frontends/dib3000mb.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c3
-rw-r--r--drivers/mmc/host/mtk-sd.c2
-rw-r--r--drivers/mmc/host/sdhci-tegra.c1
-rw-r--r--drivers/mtd/nand/raw/arasan-nand-controller.c11
-rw-r--r--drivers/mtd/nand/raw/atmel/pmecc.c4
-rw-r--r--drivers/mtd/nand/raw/diskonchip.c2
-rw-r--r--drivers/mtd/nand/raw/omap2.c16
-rw-r--r--drivers/net/can/m_can/m_can.c36
-rw-r--r--drivers/net/can/m_can/m_can.h1
-rw-r--r--drivers/net/can/m_can/m_can_pci.c1
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-platform.c5
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c5
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_main.c2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_dev.c3
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c29
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.h8
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_vf_dev.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/rep.c5
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c2
-rw-r--r--drivers/net/ethernet/oa_tc6.c11
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.c5
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_ethtool.c4
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c1
-rw-r--r--drivers/net/ethernet/renesas/rswitch.c74
-rw-r--r--drivers/net/ethernet/renesas/rswitch.h13
-rw-r--r--drivers/net/mdio/fwnode_mdio.c13
-rw-r--r--drivers/net/netdevsim/health.c2
-rw-r--r--drivers/net/netdevsim/netdev.c4
-rw-r--r--drivers/net/phy/aquantia/aquantia_leds.c2
-rw-r--r--drivers/net/phy/intel-xway.c2
-rw-r--r--drivers/net/phy/mxl-gpy.c2
-rw-r--r--drivers/net/team/team_core.c8
-rw-r--r--drivers/net/tun.c2
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/xen-netfront.c5
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--drivers/of/address.c5
-rw-r--r--drivers/of/base.c18
-rw-r--r--drivers/of/empty_root.dts9
-rw-r--r--drivers/of/irq.c2
-rw-r--r--drivers/of/property.c2
-rw-r--r--drivers/of/unittest-data/tests-address.dtsi2
-rw-r--r--drivers/of/unittest.c39
-rw-r--r--drivers/pci/pci.c6
-rw-r--r--drivers/pci/pcie/portdrv.c4
-rw-r--r--drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c6
-rw-r--r--drivers/phy/freescale/phy-fsl-samsung-hdmi.c3
-rw-r--r--drivers/phy/mediatek/Kconfig1
-rw-r--r--drivers/phy/phy-core.c21
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-usb.c2
-rw-r--r--drivers/phy/rockchip/phy-rockchip-naneng-combphy.c2
-rw-r--r--drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c3
-rw-r--r--drivers/phy/st/phy-stm32-combophy.c21
-rw-r--r--drivers/platform/chrome/cros_ec_lpc.c4
-rw-r--r--drivers/platform/loongarch/Kconfig2
-rw-r--r--drivers/platform/x86/dell/alienware-wmi.c24
-rw-r--r--drivers/platform/x86/intel/ifs/core.c1
-rw-r--r--drivers/platform/x86/intel/vsec.c2
-rw-r--r--drivers/platform/x86/p2sb.c77
-rw-r--r--drivers/platform/x86/touchscreen_dmi.c26
-rw-r--r--drivers/pwm/pwm-stm32.c2
-rw-r--r--drivers/regulator/of_regulator.c2
-rw-r--r--drivers/spi/spi-rockchip-sfc.c4
-rw-r--r--drivers/staging/fbtft/Kconfig1
-rw-r--r--drivers/staging/gpib/common/Makefile2
-rw-r--r--drivers/staging/gpib/nec7210/Makefile2
-rw-r--r--drivers/thermal/thermal_thresholds.c68
-rw-r--r--drivers/thunderbolt/nhi.c8
-rw-r--r--drivers/thunderbolt/nhi.h4
-rw-r--r--drivers/thunderbolt/retimer.c19
-rw-r--r--drivers/thunderbolt/tb.c41
-rw-r--r--drivers/usb/host/xhci-mem.c2
-rw-r--r--drivers/usb/host/xhci-ring.c2
-rw-r--r--drivers/usb/serial/option.c27
-rw-r--r--drivers/video/fbdev/Kconfig18
-rw-r--r--drivers/video/fbdev/core/Kconfig3
-rw-r--r--fs/btrfs/bio.c16
-rw-r--r--fs/btrfs/ctree.h19
-rw-r--r--fs/btrfs/extent-tree.c6
-rw-r--r--fs/btrfs/tree-checker.c27
-rw-r--r--fs/ceph/file.c77
-rw-r--r--fs/ceph/mds_client.c9
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/erofs/data.c36
-rw-r--r--fs/erofs/fileio.c9
-rw-r--r--fs/erofs/fscache.c10
-rw-r--r--fs/erofs/internal.h15
-rw-r--r--fs/erofs/super.c78
-rw-r--r--fs/erofs/zdata.c4
-rw-r--r--fs/erofs/zutil.c7
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/super.c1
-rw-r--r--fs/nfsd/export.c31
-rw-r--r--fs/nfsd/export.h4
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfs4proc.c13
-rw-r--r--fs/nilfs2/btnode.c1
-rw-r--r--fs/nilfs2/gcinode.c2
-rw-r--r--fs/nilfs2/inode.c13
-rw-r--r--fs/nilfs2/namei.c5
-rw-r--r--fs/nilfs2/nilfs.h1
-rw-r--r--fs/ocfs2/localalloc.c27
-rw-r--r--fs/smb/client/Kconfig1
-rw-r--r--fs/smb/client/cifsfs.c2
-rw-r--r--fs/smb/client/connect.c36
-rw-r--r--fs/smb/client/smb2pdu.c5
-rw-r--r--fs/smb/server/connection.c18
-rw-r--r--fs/smb/server/connection.h1
-rw-r--r--fs/smb/server/server.c7
-rw-r--r--fs/smb/server/server.h1
-rw-r--r--fs/smb/server/smb2pdu.c2
-rw-r--r--fs/smb/server/transport_ipc.c5
-rw-r--r--include/clocksource/hyperv_timer.h2
-rw-r--r--include/linux/alloc_tag.h9
-rw-r--r--include/linux/arm_ffa.h13
-rw-r--r--include/linux/cacheinfo.h6
-rw-r--r--include/linux/compiler.h37
-rw-r--r--include/linux/dmaengine.h13
-rw-r--r--include/linux/fortify-string.h14
-rw-r--r--include/linux/highmem.h8
-rw-r--r--include/linux/hyperv.h1
-rw-r--r--include/linux/io_uring.h4
-rw-r--r--include/linux/io_uring_types.h2
-rw-r--r--include/linux/mm.h31
-rw-r--r--include/linux/page-flags.h12
-rw-r--r--include/linux/platform_data/amd_qdma.h2
-rw-r--r--include/linux/skmsg.h11
-rw-r--r--include/linux/static_call.h6
-rw-r--r--include/linux/trace_events.h6
-rw-r--r--include/linux/vermagic.h6
-rw-r--r--include/linux/vmstat.h2
-rw-r--r--include/net/sock.h10
-rw-r--r--include/uapi/linux/thermal.h4
-rw-r--r--io_uring/io_uring.c17
-rw-r--r--io_uring/register.c3
-rw-r--r--io_uring/timeout.c40
-rw-r--r--kernel/bpf/verifier.c6
-rw-r--r--kernel/fork.c13
-rw-r--r--kernel/static_call_inline.c2
-rw-r--r--kernel/trace/fgraph.c8
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--kernel/trace/trace.c264
-rw-r--r--kernel/trace/trace.h6
-rw-r--r--kernel/trace/trace_events.c225
-rw-r--r--kernel/trace/trace_functions.c3
-rw-r--r--kernel/trace/trace_output.c6
-rw-r--r--lib/alloc_tag.c41
-rw-r--r--mm/huge_memory.c19
-rw-r--r--mm/hugetlb.c5
-rw-r--r--mm/internal.h6
-rw-r--r--mm/memory.c18
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/pgtable-generic.c2
-rw-r--r--mm/shmem.c22
-rw-r--r--mm/vma.c5
-rw-r--r--mm/vmalloc.c6
-rw-r--r--net/ceph/osd_client.c2
-rw-r--r--net/core/filter.c21
-rw-r--r--net/core/netdev-genl.c19
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skmsg.c11
-rw-r--r--net/dsa/tag.h16
-rw-r--r--net/ipv4/tcp_bpf.c14
-rw-r--r--net/mctp/route.c36
-rw-r--r--net/mctp/test/route-test.c86
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
-rw-r--r--net/psample/psample.c9
-rw-r--r--net/smc/af_smc.c18
-rw-r--r--net/smc/smc_clc.c17
-rw-r--r--net/smc/smc_clc.h22
-rw-r--r--net/smc/smc_core.c9
-rw-r--r--rust/kernel/net/phy.rs4
-rw-r--r--scripts/mod/modpost.c17
-rw-r--r--scripts/mod/modpost.h3
-rwxr-xr-xscripts/package/builddeb6
-rwxr-xr-xscripts/package/mkdebian7
-rw-r--r--security/selinux/ss/services.c8
-rw-r--r--sound/soc/fsl/Kconfig2
-rw-r--r--tools/hv/.gitignore3
-rw-r--r--tools/hv/hv_fcopy_uio_daemon.c12
-rwxr-xr-xtools/hv/hv_get_dns_info.sh4
-rw-r--r--tools/hv/hv_kvp_daemon.c9
-rwxr-xr-xtools/hv/hv_set_ifconfig.sh2
-rw-r--r--tools/net/ynl/lib/ynl.py6
-rw-r--r--tools/objtool/check.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/socket_helpers.h394
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c51
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h385
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_change_tail.c62
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c40
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_change_tail.c106
-rw-r--r--tools/testing/selftests/bpf/sdt.h2
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c4
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py23
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py19
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c14
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py16
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh6
-rw-r--r--usr/include/Makefile2
-rwxr-xr-xusr/include/headers_check.pl9
307 files changed, 4054 insertions, 1666 deletions
diff --git a/.mailmap b/.mailmap
index 5ff0e5d681e7..7efe43237ca8 100644
--- a/.mailmap
+++ b/.mailmap
@@ -735,6 +735,7 @@ Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de>
Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
+Ying Huang <huang.ying.caritas@gmail.com> <ying.huang@intel.com>
Yusuke Goda <goda.yusuke@renesas.com>
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 210a808b74ec..412423c54f25 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -251,9 +251,7 @@ performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
table, so we need to expose it to sysfs. If boost is not active, but
still supported, this maximum frequency will be larger than the one in
-``cpuinfo``. On systems that support preferred core, the driver will have
-different values for some cores than others and this will reflect the values
-advertised by the platform at bootup.
+``cpuinfo``.
This attribute is read-only.
``amd_pstate_lowest_nonlinear_freq``
diff --git a/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml b/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml
index 9c8c9991f29a..f0c4a7c83568 100644
--- a/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml
+++ b/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml
@@ -114,8 +114,9 @@ patternProperties:
table that specifies the PPID to LIODN mapping. Needed if the PAMU is
used. Value is a 12 bit value where value is a LIODN ID for this JR.
This property is normally set by boot firmware.
- $ref: /schemas/types.yaml#/definitions/uint32
- maximum: 0xfff
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - maximum: 0xfff
'^rtic@[0-9a-f]+$':
type: object
@@ -186,8 +187,9 @@ patternProperties:
Needed if the PAMU is used. Value is a 12 bit value where value
is a LIODN ID for this JR. This property is normally set by boot
firmware.
- $ref: /schemas/types.yaml#/definitions/uint32
- maximum: 0xfff
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - maximum: 0xfff
fsl,rtic-region:
description:
diff --git a/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
index 058253d6d889..62086366837c 100644
--- a/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
+++ b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
@@ -82,7 +82,7 @@ examples:
uimage@100000 {
reg = <0x0100000 0x200000>;
- compress = "lzma";
+ compression = "lzma";
};
};
diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,qman-portal.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,qman-portal.yaml
index 17016184143f..e459fec02ba8 100644
--- a/Documentation/devicetree/bindings/soc/fsl/fsl,qman-portal.yaml
+++ b/Documentation/devicetree/bindings/soc/fsl/fsl,qman-portal.yaml
@@ -35,6 +35,7 @@ properties:
fsl,liodn:
$ref: /schemas/types.yaml#/definitions/uint32-array
+ maxItems: 2
description: See pamu.txt. Two LIODN(s). DQRR LIODN (DLIODN) and Frame LIODN
(FLIODN)
@@ -69,6 +70,7 @@ patternProperties:
type: object
properties:
fsl,liodn:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
description: See pamu.txt, PAMU property used for static LIODN assignment
fsl,iommu-parent:
diff --git a/Documentation/mm/process_addrs.rst b/Documentation/mm/process_addrs.rst
index e8618fbc62c9..1d416658d7f5 100644
--- a/Documentation/mm/process_addrs.rst
+++ b/Documentation/mm/process_addrs.rst
@@ -3,3 +3,853 @@
=================
Process Addresses
=================
+
+.. toctree::
+ :maxdepth: 3
+
+
+Userland memory ranges are tracked by the kernel via Virtual Memory Areas or
+'VMA's of type :c:struct:`!struct vm_area_struct`.
+
+Each VMA describes a virtually contiguous memory range with identical
+attributes, each described by a :c:struct:`!struct vm_area_struct`
+object. Userland access outside of VMAs is invalid except in the case where an
+adjacent stack VMA could be extended to contain the accessed address.
+
+All VMAs are contained within one and only one virtual address space, described
+by a :c:struct:`!struct mm_struct` object which is referenced by all tasks (that is,
+threads) which share the virtual address space. We refer to this as the
+:c:struct:`!mm`.
+
+Each mm object contains a maple tree data structure which describes all VMAs
+within the virtual address space.
+
+.. note:: An exception to this is the 'gate' VMA which is provided by
+ architectures which use :c:struct:`!vsyscall` and is a global static
+ object which does not belong to any specific mm.
+
+-------
+Locking
+-------
+
+The kernel is designed to be highly scalable against concurrent read operations
+on VMA **metadata** so a complicated set of locks are required to ensure memory
+corruption does not occur.
+
+.. note:: Locking VMAs for their metadata does not have any impact on the memory
+ they describe nor the page tables that map them.
+
+Terminology
+-----------
+
+* **mmap locks** - Each MM has a read/write semaphore :c:member:`!mmap_lock`
+ which locks at a process address space granularity which can be acquired via
+ :c:func:`!mmap_read_lock`, :c:func:`!mmap_write_lock` and variants.
+* **VMA locks** - The VMA lock is at VMA granularity (of course) which behaves
+ as a read/write semaphore in practice. A VMA read lock is obtained via
+ :c:func:`!lock_vma_under_rcu` (and unlocked via :c:func:`!vma_end_read`) and a
+ write lock via :c:func:`!vma_start_write` (all VMA write locks are unlocked
+ automatically when the mmap write lock is released). To take a VMA write lock
+ you **must** have already acquired an :c:func:`!mmap_write_lock`.
+* **rmap locks** - When trying to access VMAs through the reverse mapping via a
+ :c:struct:`!struct address_space` or :c:struct:`!struct anon_vma` object
+ (reachable from a folio via :c:member:`!folio->mapping`). VMAs must be stabilised via
+ :c:func:`!anon_vma_[try]lock_read` or :c:func:`!anon_vma_[try]lock_write` for
+ anonymous memory and :c:func:`!i_mmap_[try]lock_read` or
+ :c:func:`!i_mmap_[try]lock_write` for file-backed memory. We refer to these
+ locks as the reverse mapping locks, or 'rmap locks' for brevity.
+
+We discuss page table locks separately in the dedicated section below.
+
+The first thing **any** of these locks achieve is to **stabilise** the VMA
+within the MM tree. That is, guaranteeing that the VMA object will not be
+deleted from under you nor modified (except for some specific fields
+described below).
+
+Stabilising a VMA also keeps the address space described by it around.
+
+Lock usage
+----------
+
+If you want to **read** VMA metadata fields or just keep the VMA stable, you
+must do one of the following:
+
+* Obtain an mmap read lock at the MM granularity via :c:func:`!mmap_read_lock` (or a
+ suitable variant), unlocking it with a matching :c:func:`!mmap_read_unlock` when
+ you're done with the VMA, *or*
+* Try to obtain a VMA read lock via :c:func:`!lock_vma_under_rcu`. This tries to
+ acquire the lock atomically so might fail, in which case fall-back logic is
+ required to instead obtain an mmap read lock if this returns :c:macro:`!NULL`,
+ *or*
+* Acquire an rmap lock before traversing the locked interval tree (whether
+ anonymous or file-backed) to obtain the required VMA.
+
+If you want to **write** VMA metadata fields, then things vary depending on the
+field (we explore each VMA field in detail below). For the majority you must:
+
+* Obtain an mmap write lock at the MM granularity via :c:func:`!mmap_write_lock` (or a
+ suitable variant), unlocking it with a matching :c:func:`!mmap_write_unlock` when
+ you're done with the VMA, *and*
+* Obtain a VMA write lock via :c:func:`!vma_start_write` for each VMA you wish to
+ modify, which will be released automatically when :c:func:`!mmap_write_unlock` is
+ called.
+* If you want to be able to write to **any** field, you must also hide the VMA
+ from the reverse mapping by obtaining an **rmap write lock**.
+
+VMA locks are special in that you must obtain an mmap **write** lock **first**
+in order to obtain a VMA **write** lock. A VMA **read** lock however can be
+obtained without any other lock (:c:func:`!lock_vma_under_rcu` will acquire then
+release an RCU lock to lookup the VMA for you).
+
+This constrains the impact of writers on readers, as a writer can interact with
+one VMA while a reader interacts with another simultaneously.
+
+.. note:: The primary users of VMA read locks are page fault handlers, which
+ means that without a VMA write lock, page faults will run concurrent with
+ whatever you are doing.
+
+Examining all valid lock states:
+
+.. table::
+
+ ========= ======== ========= ======= ===== =========== ==========
+ mmap lock VMA lock rmap lock Stable? Read? Write most? Write all?
+ ========= ======== ========= ======= ===== =========== ==========
+ \- \- \- N N N N
+ \- R \- Y Y N N
+ \- \- R/W Y Y N N
+ R/W \-/R \-/R/W Y Y N N
+ W W \-/R Y Y Y N
+ W W W Y Y Y Y
+ ========= ======== ========= ======= ===== =========== ==========
+
+.. warning:: While it's possible to obtain a VMA lock while holding an mmap read lock,
+ attempting to do the reverse is invalid as it can result in deadlock - if
+ another task already holds an mmap write lock and attempts to acquire a VMA
+ write lock that will deadlock on the VMA read lock.
+
+All of these locks behave as read/write semaphores in practice, so you can
+obtain either a read or a write lock for each of these.
+
+.. note:: Generally speaking, a read/write semaphore is a class of lock which
+ permits concurrent readers. However a write lock can only be obtained
+ once all readers have left the critical region (and pending readers
+ made to wait).
+
+ This renders read locks on a read/write semaphore concurrent with other
+ readers and write locks exclusive against all others holding the semaphore.
+
+VMA fields
+^^^^^^^^^^
+
+We can subdivide :c:struct:`!struct vm_area_struct` fields by their purpose, which makes it
+easier to explore their locking characteristics:
+
+.. note:: We exclude VMA lock-specific fields here to avoid confusion, as these
+ are in effect an internal implementation detail.
+
+.. table:: Virtual layout fields
+
+ ===================== ======================================== ===========
+ Field Description Write lock
+ ===================== ======================================== ===========
+ :c:member:`!vm_start` Inclusive start virtual address of range mmap write,
+ VMA describes. VMA write,
+ rmap write.
+ :c:member:`!vm_end` Exclusive end virtual address of range mmap write,
+ VMA describes. VMA write,
+ rmap write.
+ :c:member:`!vm_pgoff` Describes the page offset into the file, mmap write,
+ the original page offset within the VMA write,
+ virtual address space (prior to any rmap write.
+ :c:func:`!mremap`), or PFN if a PFN map
+ and the architecture does not support
+ :c:macro:`!CONFIG_ARCH_HAS_PTE_SPECIAL`.
+ ===================== ======================================== ===========
+
+These fields describes the size, start and end of the VMA, and as such cannot be
+modified without first being hidden from the reverse mapping since these fields
+are used to locate VMAs within the reverse mapping interval trees.
+
+.. table:: Core fields
+
+ ============================ ======================================== =========================
+ Field Description Write lock
+ ============================ ======================================== =========================
+ :c:member:`!vm_mm` Containing mm_struct. None - written once on
+ initial map.
+ :c:member:`!vm_page_prot` Architecture-specific page table mmap write, VMA write.
+ protection bits determined from VMA
+ flags.
+ :c:member:`!vm_flags` Read-only access to VMA flags describing N/A
+ attributes of the VMA, in union with
+ private writable
+ :c:member:`!__vm_flags`.
+ :c:member:`!__vm_flags` Private, writable access to VMA flags mmap write, VMA write.
+ field, updated by
+ :c:func:`!vm_flags_*` functions.
+ :c:member:`!vm_file` If the VMA is file-backed, points to a None - written once on
+ struct file object describing the initial map.
+ underlying file, if anonymous then
+ :c:macro:`!NULL`.
+ :c:member:`!vm_ops` If the VMA is file-backed, then either None - Written once on
+ the driver or file-system provides a initial map by
+ :c:struct:`!struct vm_operations_struct` :c:func:`!f_ops->mmap()`.
+ object describing callbacks to be
+ invoked on VMA lifetime events.
+ :c:member:`!vm_private_data` A :c:member:`!void *` field for Handled by driver.
+ driver-specific metadata.
+ ============================ ======================================== =========================
+
+These are the core fields which describe the MM the VMA belongs to and its attributes.
+
+.. table:: Config-specific fields
+
+ ================================= ===================== ======================================== ===============
+ Field Configuration option Description Write lock
+ ================================= ===================== ======================================== ===============
+ :c:member:`!anon_name` CONFIG_ANON_VMA_NAME A field for storing a mmap write,
+ :c:struct:`!struct anon_vma_name` VMA write.
+ object providing a name for anonymous
+ mappings, or :c:macro:`!NULL` if none
+ is set or the VMA is file-backed. The
+ underlying object is reference counted
+ and can be shared across multiple VMAs
+ for scalability.
+ :c:member:`!swap_readahead_info` CONFIG_SWAP Metadata used by the swap mechanism mmap read,
+ to perform readahead. This field is swap-specific
+ accessed atomically. lock.
+ :c:member:`!vm_policy` CONFIG_NUMA :c:type:`!mempolicy` object which mmap write,
+ describes the NUMA behaviour of the VMA write.
+ VMA. The underlying object is reference
+ counted.
+ :c:member:`!numab_state` CONFIG_NUMA_BALANCING :c:type:`!vma_numab_state` object which mmap read,
+ describes the current state of numab-specific
+ NUMA balancing in relation to this VMA. lock.
+ Updated under mmap read lock by
+ :c:func:`!task_numa_work`.
+ :c:member:`!vm_userfaultfd_ctx` CONFIG_USERFAULTFD Userfaultfd context wrapper object of mmap write,
+ type :c:type:`!vm_userfaultfd_ctx`, VMA write.
+ either of zero size if userfaultfd is
+ disabled, or containing a pointer
+ to an underlying
+ :c:type:`!userfaultfd_ctx` object which
+ describes userfaultfd metadata.
+ ================================= ===================== ======================================== ===============
+
+These fields are present or not depending on whether the relevant kernel
+configuration option is set.
+
+.. table:: Reverse mapping fields
+
+ =================================== ========================================= ============================
+ Field Description Write lock
+ =================================== ========================================= ============================
+ :c:member:`!shared.rb` A red/black tree node used, if the mmap write, VMA write,
+ mapping is file-backed, to place the VMA i_mmap write.
+ in the
+ :c:member:`!struct address_space->i_mmap`
+ red/black interval tree.
+ :c:member:`!shared.rb_subtree_last` Metadata used for management of the mmap write, VMA write,
+ interval tree if the VMA is file-backed. i_mmap write.
+ :c:member:`!anon_vma_chain` List of pointers to both forked/CoW’d mmap read, anon_vma write.
+ :c:type:`!anon_vma` objects and
+ :c:member:`!vma->anon_vma` if it is
+ non-:c:macro:`!NULL`.
+ :c:member:`!anon_vma` :c:type:`!anon_vma` object used by When :c:macro:`NULL` and
+ anonymous folios mapped exclusively to setting non-:c:macro:`NULL`:
+ this VMA. Initially set by mmap read, page_table_lock.
+ :c:func:`!anon_vma_prepare` serialised
+ by the :c:macro:`!page_table_lock`. This When non-:c:macro:`NULL` and
+ is set as soon as any page is faulted in. setting :c:macro:`NULL`:
+ mmap write, VMA write,
+ anon_vma write.
+ =================================== ========================================= ============================
+
+These fields are used to both place the VMA within the reverse mapping, and for
+anonymous mappings, to be able to access both related :c:struct:`!struct anon_vma` objects
+and the :c:struct:`!struct anon_vma` in which folios mapped exclusively to this VMA should
+reside.
+
+.. note:: If a file-backed mapping is mapped with :c:macro:`!MAP_PRIVATE` set
+ then it can be in both the :c:type:`!anon_vma` and :c:type:`!i_mmap`
+ trees at the same time, so all of these fields might be utilised at
+ once.
+
+Page tables
+-----------
+
+We won't speak exhaustively on the subject but broadly speaking, page tables map
+virtual addresses to physical ones through a series of page tables, each of
+which contain entries with physical addresses for the next page table level
+(along with flags), and at the leaf level the physical addresses of the
+underlying physical data pages or a special entry such as a swap entry,
+migration entry or other special marker. Offsets into these pages are provided
+by the virtual address itself.
+
+In Linux these are divided into five levels - PGD, P4D, PUD, PMD and PTE. Huge
+pages might eliminate one or two of these levels, but when this is the case we
+typically refer to the leaf level as the PTE level regardless.
+
+.. note:: In instances where the architecture supports fewer page tables than
+ five the kernel cleverly 'folds' page table levels, that is stubbing
+ out functions related to the skipped levels. This allows us to
+ conceptually act as if there were always five levels, even if the
+ compiler might, in practice, eliminate any code relating to missing
+ ones.
+
+There are four key operations typically performed on page tables:
+
+1. **Traversing** page tables - Simply reading page tables in order to traverse
+ them. This only requires that the VMA is kept stable, so a lock which
+ establishes this suffices for traversal (there are also lockless variants
+ which eliminate even this requirement, such as :c:func:`!gup_fast`).
+2. **Installing** page table mappings - Whether creating a new mapping or
+ modifying an existing one in such a way as to change its identity. This
+ requires that the VMA is kept stable via an mmap or VMA lock (explicitly not
+ rmap locks).
+3. **Zapping/unmapping** page table entries - This is what the kernel calls
+ clearing page table mappings at the leaf level only, whilst leaving all page
+ tables in place. This is a very common operation in the kernel performed on
+ file truncation, the :c:macro:`!MADV_DONTNEED` operation via
+ :c:func:`!madvise`, and others. This is performed by a number of functions
+ including :c:func:`!unmap_mapping_range` and :c:func:`!unmap_mapping_pages`.
+ The VMA need only be kept stable for this operation.
+4. **Freeing** page tables - When finally the kernel removes page tables from a
+ userland process (typically via :c:func:`!free_pgtables`) extreme care must
+ be taken to ensure this is done safely, as this logic finally frees all page
+ tables in the specified range, ignoring existing leaf entries (it assumes the
+ caller has both zapped the range and prevented any further faults or
+ modifications within it).
+
+.. note:: Modifying mappings for reclaim or migration is performed under rmap
+ lock as it, like zapping, does not fundamentally modify the identity
+ of what is being mapped.
+
+**Traversing** and **zapping** ranges can be performed holding any one of the
+locks described in the terminology section above - that is the mmap lock, the
+VMA lock or either of the reverse mapping locks.
+
+That is - as long as you keep the relevant VMA **stable** - you are good to go
+ahead and perform these operations on page tables (though internally, kernel
+operations that perform writes also acquire internal page table locks to
+serialise - see the page table implementation detail section for more details).
+
+When **installing** page table entries, the mmap or VMA lock must be held to
+keep the VMA stable. We explore why this is in the page table locking details
+section below.
+
+.. warning:: Page tables are normally only traversed in regions covered by VMAs.
+ If you want to traverse page tables in areas that might not be
+ covered by VMAs, heavier locking is required.
+ See :c:func:`!walk_page_range_novma` for details.
+
+**Freeing** page tables is an entirely internal memory management operation and
+has special requirements (see the page freeing section below for more details).
+
+.. warning:: When **freeing** page tables, it must not be possible for VMAs
+ containing the ranges those page tables map to be accessible via
+ the reverse mapping.
+
+ The :c:func:`!free_pgtables` function removes the relevant VMAs
+ from the reverse mappings, but no other VMAs can be permitted to be
+ accessible and span the specified range.
+
+Lock ordering
+-------------
+
+As we have multiple locks across the kernel which may or may not be taken at the
+same time as explicit mm or VMA locks, we have to be wary of lock inversion, and
+the **order** in which locks are acquired and released becomes very important.
+
+.. note:: Lock inversion occurs when two threads need to acquire multiple locks,
+ but in doing so inadvertently cause a mutual deadlock.
+
+ For example, consider thread 1 which holds lock A and tries to acquire lock B,
+ while thread 2 holds lock B and tries to acquire lock A.
+
+ Both threads are now deadlocked on each other. However, had they attempted to
+ acquire locks in the same order, one would have waited for the other to
+ complete its work and no deadlock would have occurred.
+
+The opening comment in :c:macro:`!mm/rmap.c` describes in detail the required
+ordering of locks within memory management code:
+
+.. code-block::
+
+ inode->i_rwsem (while writing or truncating, not reading or faulting)
+ mm->mmap_lock
+ mapping->invalidate_lock (in filemap_fault)
+ folio_lock
+ hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs below)
+ vma_start_write
+ mapping->i_mmap_rwsem
+ anon_vma->rwsem
+ mm->page_table_lock or pte_lock
+ swap_lock (in swap_duplicate, swap_info_get)
+ mmlist_lock (in mmput, drain_mmlist and others)
+ mapping->private_lock (in block_dirty_folio)
+ i_pages lock (widely used)
+ lruvec->lru_lock (in folio_lruvec_lock_irq)
+ inode->i_lock (in set_page_dirty's __mark_inode_dirty)
+ bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
+ sb_lock (within inode_lock in fs/fs-writeback.c)
+ i_pages lock (widely used, in set_page_dirty,
+ in arch-dependent flush_dcache_mmap_lock,
+ within bdi.wb->list_lock in __sync_single_inode)
+
+There is also a file-system specific lock ordering comment located at the top of
+:c:macro:`!mm/filemap.c`:
+
+.. code-block::
+
+ ->i_mmap_rwsem (truncate_pagecache)
+ ->private_lock (__free_pte->block_dirty_folio)
+ ->swap_lock (exclusive_swap_page, others)
+ ->i_pages lock
+
+ ->i_rwsem
+ ->invalidate_lock (acquired by fs in truncate path)
+ ->i_mmap_rwsem (truncate->unmap_mapping_range)
+
+ ->mmap_lock
+ ->i_mmap_rwsem
+ ->page_table_lock or pte_lock (various, mainly in memory.c)
+ ->i_pages lock (arch-dependent flush_dcache_mmap_lock)
+
+ ->mmap_lock
+ ->invalidate_lock (filemap_fault)
+ ->lock_page (filemap_fault, access_process_vm)
+
+ ->i_rwsem (generic_perform_write)
+ ->mmap_lock (fault_in_readable->do_page_fault)
+
+ bdi->wb.list_lock
+ sb_lock (fs/fs-writeback.c)
+ ->i_pages lock (__sync_single_inode)
+
+ ->i_mmap_rwsem
+ ->anon_vma.lock (vma_merge)
+
+ ->anon_vma.lock
+ ->page_table_lock or pte_lock (anon_vma_prepare and various)
+
+ ->page_table_lock or pte_lock
+ ->swap_lock (try_to_unmap_one)
+ ->private_lock (try_to_unmap_one)
+ ->i_pages lock (try_to_unmap_one)
+ ->lruvec->lru_lock (follow_page_mask->mark_page_accessed)
+ ->lruvec->lru_lock (check_pte_range->folio_isolate_lru)
+ ->private_lock (folio_remove_rmap_pte->set_page_dirty)
+ ->i_pages lock (folio_remove_rmap_pte->set_page_dirty)
+ bdi.wb->list_lock (folio_remove_rmap_pte->set_page_dirty)
+ ->inode->i_lock (folio_remove_rmap_pte->set_page_dirty)
+ bdi.wb->list_lock (zap_pte_range->set_page_dirty)
+ ->inode->i_lock (zap_pte_range->set_page_dirty)
+ ->private_lock (zap_pte_range->block_dirty_folio)
+
+Please check the current state of these comments which may have changed since
+the time of writing of this document.
+
+------------------------------
+Locking Implementation Details
+------------------------------
+
+.. warning:: Locking rules for PTE-level page tables are very different from
+ locking rules for page tables at other levels.
+
+Page table locking details
+--------------------------
+
+In addition to the locks described in the terminology section above, we have
+additional locks dedicated to page tables:
+
+* **Higher level page table locks** - Higher level page tables, that is PGD, P4D
+ and PUD each make use of the process address space granularity
+ :c:member:`!mm->page_table_lock` lock when modified.
+
+* **Fine-grained page table locks** - PMDs and PTEs each have fine-grained locks
+ either kept within the folios describing the page tables or allocated
+ separated and pointed at by the folios if :c:macro:`!ALLOC_SPLIT_PTLOCKS` is
+ set. The PMD spin lock is obtained via :c:func:`!pmd_lock`, however PTEs are
+ mapped into higher memory (if a 32-bit system) and carefully locked via
+ :c:func:`!pte_offset_map_lock`.
+
+These locks represent the minimum required to interact with each page table
+level, but there are further requirements.
+
+Importantly, note that on a **traversal** of page tables, sometimes no such
+locks are taken. However, at the PTE level, at least concurrent page table
+deletion must be prevented (using RCU) and the page table must be mapped into
+high memory, see below.
+
+Whether care is taken on reading the page table entries depends on the
+architecture, see the section on atomicity below.
+
+Locking rules
+^^^^^^^^^^^^^
+
+We establish basic locking rules when interacting with page tables:
+
+* When changing a page table entry the page table lock for that page table
+ **must** be held, except if you can safely assume nobody can access the page
+ tables concurrently (such as on invocation of :c:func:`!free_pgtables`).
+* Reads from and writes to page table entries must be *appropriately*
+ atomic. See the section on atomicity below for details.
+* Populating previously empty entries requires that the mmap or VMA locks are
+ held (read or write), doing so with only rmap locks would be dangerous (see
+ the warning below).
+* As mentioned previously, zapping can be performed while simply keeping the VMA
+ stable, that is holding any one of the mmap, VMA or rmap locks.
+
+.. warning:: Populating previously empty entries is dangerous as, when unmapping
+ VMAs, :c:func:`!vms_clear_ptes` has a window of time between
+ zapping (via :c:func:`!unmap_vmas`) and freeing page tables (via
+ :c:func:`!free_pgtables`), where the VMA is still visible in the
+ rmap tree. :c:func:`!free_pgtables` assumes that the zap has
+ already been performed and removes PTEs unconditionally (along with
+ all other page tables in the freed range), so installing new PTE
+ entries could leak memory and also cause other unexpected and
+ dangerous behaviour.
+
+There are additional rules applicable when moving page tables, which we discuss
+in the section on this topic below.
+
+PTE-level page tables are different from page tables at other levels, and there
+are extra requirements for accessing them:
+
+* On 32-bit architectures, they may be in high memory (meaning they need to be
+ mapped into kernel memory to be accessible).
+* When empty, they can be unlinked and RCU-freed while holding an mmap lock or
+ rmap lock for reading in combination with the PTE and PMD page table locks.
+ In particular, this happens in :c:func:`!retract_page_tables` when handling
+ :c:macro:`!MADV_COLLAPSE`.
+ So accessing PTE-level page tables requires at least holding an RCU read lock;
+ but that only suffices for readers that can tolerate racing with concurrent
+ page table updates such that an empty PTE is observed (in a page table that
+ has actually already been detached and marked for RCU freeing) while another
+ new page table has been installed in the same location and filled with
+ entries. Writers normally need to take the PTE lock and revalidate that the
+ PMD entry still refers to the same PTE-level page table.
+
+To access PTE-level page tables, a helper like :c:func:`!pte_offset_map_lock` or
+:c:func:`!pte_offset_map` can be used depending on stability requirements.
+These map the page table into kernel memory if required, take the RCU lock, and
+depending on variant, may also look up or acquire the PTE lock.
+See the comment on :c:func:`!__pte_offset_map_lock`.
+
+Atomicity
+^^^^^^^^^
+
+Regardless of page table locks, the MMU hardware concurrently updates accessed
+and dirty bits (perhaps more, depending on architecture). Additionally, page
+table traversal operations in parallel (though holding the VMA stable) and
+functionality like GUP-fast locklessly traverses (that is reads) page tables,
+without even keeping the VMA stable at all.
+
+When performing a page table traversal and keeping the VMA stable, whether a
+read must be performed once and only once or not depends on the architecture
+(for instance x86-64 does not require any special precautions).
+
+If a write is being performed, or if a read informs whether a write takes place
+(on an installation of a page table entry say, for instance in
+:c:func:`!__pud_install`), special care must always be taken. In these cases we
+can never assume that page table locks give us entirely exclusive access, and
+must retrieve page table entries once and only once.
+
+If we are reading page table entries, then we need only ensure that the compiler
+does not rearrange our loads. This is achieved via :c:func:`!pXXp_get`
+functions - :c:func:`!pgdp_get`, :c:func:`!p4dp_get`, :c:func:`!pudp_get`,
+:c:func:`!pmdp_get`, and :c:func:`!ptep_get`.
+
+Each of these uses :c:func:`!READ_ONCE` to guarantee that the compiler reads
+the page table entry only once.
+
+However, if we wish to manipulate an existing page table entry and care about
+the previously stored data, we must go further and use an hardware atomic
+operation as, for example, in :c:func:`!ptep_get_and_clear`.
+
+Equally, operations that do not rely on the VMA being held stable, such as
+GUP-fast (see :c:func:`!gup_fast` and its various page table level handlers like
+:c:func:`!gup_fast_pte_range`), must very carefully interact with page table
+entries, using functions such as :c:func:`!ptep_get_lockless` and equivalent for
+higher level page table levels.
+
+Writes to page table entries must also be appropriately atomic, as established
+by :c:func:`!set_pXX` functions - :c:func:`!set_pgd`, :c:func:`!set_p4d`,
+:c:func:`!set_pud`, :c:func:`!set_pmd`, and :c:func:`!set_pte`.
+
+Equally functions which clear page table entries must be appropriately atomic,
+as in :c:func:`!pXX_clear` functions - :c:func:`!pgd_clear`,
+:c:func:`!p4d_clear`, :c:func:`!pud_clear`, :c:func:`!pmd_clear`, and
+:c:func:`!pte_clear`.
+
+Page table installation
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Page table installation is performed with the VMA held stable explicitly by an
+mmap or VMA lock in read or write mode (see the warning in the locking rules
+section for details as to why).
+
+When allocating a P4D, PUD or PMD and setting the relevant entry in the above
+PGD, P4D or PUD, the :c:member:`!mm->page_table_lock` must be held. This is
+acquired in :c:func:`!__p4d_alloc`, :c:func:`!__pud_alloc` and
+:c:func:`!__pmd_alloc` respectively.
+
+.. note:: :c:func:`!__pmd_alloc` actually invokes :c:func:`!pud_lock` and
+ :c:func:`!pud_lockptr` in turn, however at the time of writing it ultimately
+ references the :c:member:`!mm->page_table_lock`.
+
+Allocating a PTE will either use the :c:member:`!mm->page_table_lock` or, if
+:c:macro:`!USE_SPLIT_PMD_PTLOCKS` is defined, a lock embedded in the PMD
+physical page metadata in the form of a :c:struct:`!struct ptdesc`, acquired by
+:c:func:`!pmd_ptdesc` called from :c:func:`!pmd_lock` and ultimately
+:c:func:`!__pte_alloc`.
+
+Finally, modifying the contents of the PTE requires special treatment, as the
+PTE page table lock must be acquired whenever we want stable and exclusive
+access to entries contained within a PTE, especially when we wish to modify
+them.
+
+This is performed via :c:func:`!pte_offset_map_lock` which carefully checks to
+ensure that the PTE hasn't changed from under us, ultimately invoking
+:c:func:`!pte_lockptr` to obtain a spin lock at PTE granularity contained within
+the :c:struct:`!struct ptdesc` associated with the physical PTE page. The lock
+must be released via :c:func:`!pte_unmap_unlock`.
+
+.. note:: There are some variants on this, such as
+ :c:func:`!pte_offset_map_rw_nolock` when we know we hold the PTE stable but
+ for brevity we do not explore this. See the comment for
+ :c:func:`!__pte_offset_map_lock` for more details.
+
+When modifying data in ranges we typically only wish to allocate higher page
+tables as necessary, using these locks to avoid races or overwriting anything,
+and set/clear data at the PTE level as required (for instance when page faulting
+or zapping).
+
+A typical pattern taken when traversing page table entries to install a new
+mapping is to optimistically determine whether the page table entry in the table
+above is empty, if so, only then acquiring the page table lock and checking
+again to see if it was allocated underneath us.
+
+This allows for a traversal with page table locks only being taken when
+required. An example of this is :c:func:`!__pud_alloc`.
+
+At the leaf page table, that is the PTE, we can't entirely rely on this pattern
+as we have separate PMD and PTE locks and a THP collapse for instance might have
+eliminated the PMD entry as well as the PTE from under us.
+
+This is why :c:func:`!__pte_offset_map_lock` locklessly retrieves the PMD entry
+for the PTE, carefully checking it is as expected, before acquiring the
+PTE-specific lock, and then *again* checking that the PMD entry is as expected.
+
+If a THP collapse (or similar) were to occur then the lock on both pages would
+be acquired, so we can ensure this is prevented while the PTE lock is held.
+
+Installing entries this way ensures mutual exclusion on write.
+
+Page table freeing
+^^^^^^^^^^^^^^^^^^
+
+Tearing down page tables themselves is something that requires significant
+care. There must be no way that page tables designated for removal can be
+traversed or referenced by concurrent tasks.
+
+It is insufficient to simply hold an mmap write lock and VMA lock (which will
+prevent racing faults, and rmap operations), as a file-backed mapping can be
+truncated under the :c:struct:`!struct address_space->i_mmap_rwsem` alone.
+
+As a result, no VMA which can be accessed via the reverse mapping (either
+through the :c:struct:`!struct anon_vma->rb_root` or the :c:member:`!struct
+address_space->i_mmap` interval trees) can have its page tables torn down.
+
+The operation is typically performed via :c:func:`!free_pgtables`, which assumes
+either the mmap write lock has been taken (as specified by its
+:c:member:`!mm_wr_locked` parameter), or that the VMA is already unreachable.
+
+It carefully removes the VMA from all reverse mappings, however it's important
+that no new ones overlap these or any route remain to permit access to addresses
+within the range whose page tables are being torn down.
+
+Additionally, it assumes that a zap has already been performed and steps have
+been taken to ensure that no further page table entries can be installed between
+the zap and the invocation of :c:func:`!free_pgtables`.
+
+Since it is assumed that all such steps have been taken, page table entries are
+cleared without page table locks (in the :c:func:`!pgd_clear`, :c:func:`!p4d_clear`,
+:c:func:`!pud_clear`, and :c:func:`!pmd_clear` functions.
+
+.. note:: It is possible for leaf page tables to be torn down independent of
+ the page tables above it as is done by
+ :c:func:`!retract_page_tables`, which is performed under the i_mmap
+ read lock, PMD, and PTE page table locks, without this level of care.
+
+Page table moving
+^^^^^^^^^^^^^^^^^
+
+Some functions manipulate page table levels above PMD (that is PUD, P4D and PGD
+page tables). Most notable of these is :c:func:`!mremap`, which is capable of
+moving higher level page tables.
+
+In these instances, it is required that **all** locks are taken, that is
+the mmap lock, the VMA lock and the relevant rmap locks.
+
+You can observe this in the :c:func:`!mremap` implementation in the functions
+:c:func:`!take_rmap_locks` and :c:func:`!drop_rmap_locks` which perform the rmap
+side of lock acquisition, invoked ultimately by :c:func:`!move_page_tables`.
+
+VMA lock internals
+------------------
+
+Overview
+^^^^^^^^
+
+VMA read locking is entirely optimistic - if the lock is contended or a competing
+write has started, then we do not obtain a read lock.
+
+A VMA **read** lock is obtained by :c:func:`!lock_vma_under_rcu`, which first
+calls :c:func:`!rcu_read_lock` to ensure that the VMA is looked up in an RCU
+critical section, then attempts to VMA lock it via :c:func:`!vma_start_read`,
+before releasing the RCU lock via :c:func:`!rcu_read_unlock`.
+
+VMA read locks hold the read lock on the :c:member:`!vma->vm_lock` semaphore for
+their duration and the caller of :c:func:`!lock_vma_under_rcu` must release it
+via :c:func:`!vma_end_read`.
+
+VMA **write** locks are acquired via :c:func:`!vma_start_write` in instances where a
+VMA is about to be modified, unlike :c:func:`!vma_start_read` the lock is always
+acquired. An mmap write lock **must** be held for the duration of the VMA write
+lock, releasing or downgrading the mmap write lock also releases the VMA write
+lock so there is no :c:func:`!vma_end_write` function.
+
+Note that a semaphore write lock is not held across a VMA lock. Rather, a
+sequence number is used for serialisation, and the write semaphore is only
+acquired at the point of write lock to update this.
+
+This ensures the semantics we require - VMA write locks provide exclusive write
+access to the VMA.
+
+Implementation details
+^^^^^^^^^^^^^^^^^^^^^^
+
+The VMA lock mechanism is designed to be a lightweight means of avoiding the use
+of the heavily contended mmap lock. It is implemented using a combination of a
+read/write semaphore and sequence numbers belonging to the containing
+:c:struct:`!struct mm_struct` and the VMA.
+
+Read locks are acquired via :c:func:`!vma_start_read`, which is an optimistic
+operation, i.e. it tries to acquire a read lock but returns false if it is
+unable to do so. At the end of the read operation, :c:func:`!vma_end_read` is
+called to release the VMA read lock.
+
+Invoking :c:func:`!vma_start_read` requires that :c:func:`!rcu_read_lock` has
+been called first, establishing that we are in an RCU critical section upon VMA
+read lock acquisition. Once acquired, the RCU lock can be released as it is only
+required for lookup. This is abstracted by :c:func:`!lock_vma_under_rcu` which
+is the interface a user should use.
+
+Writing requires the mmap to be write-locked and the VMA lock to be acquired via
+:c:func:`!vma_start_write`, however the write lock is released by the termination or
+downgrade of the mmap write lock so no :c:func:`!vma_end_write` is required.
+
+All this is achieved by the use of per-mm and per-VMA sequence counts, which are
+used in order to reduce complexity, especially for operations which write-lock
+multiple VMAs at once.
+
+If the mm sequence count, :c:member:`!mm->mm_lock_seq` is equal to the VMA
+sequence count :c:member:`!vma->vm_lock_seq` then the VMA is write-locked. If
+they differ, then it is not.
+
+Each time the mmap write lock is released in :c:func:`!mmap_write_unlock` or
+:c:func:`!mmap_write_downgrade`, :c:func:`!vma_end_write_all` is invoked which
+also increments :c:member:`!mm->mm_lock_seq` via
+:c:func:`!mm_lock_seqcount_end`.
+
+This way, we ensure that, regardless of the VMA's sequence number, a write lock
+is never incorrectly indicated and that when we release an mmap write lock we
+efficiently release **all** VMA write locks contained within the mmap at the
+same time.
+
+Since the mmap write lock is exclusive against others who hold it, the automatic
+release of any VMA locks on its release makes sense, as you would never want to
+keep VMAs locked across entirely separate write operations. It also maintains
+correct lock ordering.
+
+Each time a VMA read lock is acquired, we acquire a read lock on the
+:c:member:`!vma->vm_lock` read/write semaphore and hold it, while checking that
+the sequence count of the VMA does not match that of the mm.
+
+If it does, the read lock fails. If it does not, we hold the lock, excluding
+writers, but permitting other readers, who will also obtain this lock under RCU.
+
+Importantly, maple tree operations performed in :c:func:`!lock_vma_under_rcu`
+are also RCU safe, so the whole read lock operation is guaranteed to function
+correctly.
+
+On the write side, we acquire a write lock on the :c:member:`!vma->vm_lock`
+read/write semaphore, before setting the VMA's sequence number under this lock,
+also simultaneously holding the mmap write lock.
+
+This way, if any read locks are in effect, :c:func:`!vma_start_write` will sleep
+until these are finished and mutual exclusion is achieved.
+
+After setting the VMA's sequence number, the lock is released, avoiding
+complexity with a long-term held write lock.
+
+This clever combination of a read/write semaphore and sequence count allows for
+fast RCU-based per-VMA lock acquisition (especially on page fault, though
+utilised elsewhere) with minimal complexity around lock ordering.
+
+mmap write lock downgrading
+---------------------------
+
+When an mmap write lock is held one has exclusive access to resources within the
+mmap (with the usual caveats about requiring VMA write locks to avoid races with
+tasks holding VMA read locks).
+
+It is then possible to **downgrade** from a write lock to a read lock via
+:c:func:`!mmap_write_downgrade` which, similar to :c:func:`!mmap_write_unlock`,
+implicitly terminates all VMA write locks via :c:func:`!vma_end_write_all`, but
+importantly does not relinquish the mmap lock while downgrading, therefore
+keeping the locked virtual address space stable.
+
+An interesting consequence of this is that downgraded locks are exclusive
+against any other task possessing a downgraded lock (since a racing task would
+have to acquire a write lock first to downgrade it, and the downgraded lock
+prevents a new write lock from being obtained until the original lock is
+released).
+
+For clarity, we map read (R)/downgraded write (D)/write (W) locks against one
+another showing which locks exclude the others:
+
+.. list-table:: Lock exclusivity
+ :widths: 5 5 5 5
+ :header-rows: 1
+ :stub-columns: 1
+
+ * -
+ - R
+ - D
+ - W
+ * - R
+ - N
+ - N
+ - Y
+ * - D
+ - N
+ - Y
+ - Y
+ * - W
+ - Y
+ - Y
+ - Y
+
+Here a Y indicates the locks in the matching row/column are mutually exclusive,
+and N indicates that they are not.
+
+Stack expansion
+---------------
+
+Stack expansion throws up additional complexities in that we cannot permit there
+to be racing page faults, as a result we invoke :c:func:`!vma_start_write` to
+prevent this in :c:func:`!expand_downwards` or :c:func:`!expand_upwards`.
diff --git a/MAINTAINERS b/MAINTAINERS
index baf0eeb9a355..910305c11e8a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7347,7 +7347,7 @@ F: drivers/gpu/drm/panel/panel-novatek-nt36672a.c
DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS
M: Karol Herbst <kherbst@redhat.com>
M: Lyude Paul <lyude@redhat.com>
-M: Danilo Krummrich <dakr@redhat.com>
+M: Danilo Krummrich <dakr@kernel.org>
L: dri-devel@lists.freedesktop.org
L: nouveau@lists.freedesktop.org
S: Supported
@@ -8453,7 +8453,7 @@ F: include/video/s1d13xxxfb.h
EROFS FILE SYSTEM
M: Gao Xiang <xiang@kernel.org>
M: Chao Yu <chao@kernel.org>
-R: Yue Hu <huyue2@coolpad.com>
+R: Yue Hu <zbestahu@gmail.com>
R: Jeffle Xu <jefflexu@linux.alibaba.com>
R: Sandeep Dhavale <dhavale@google.com>
L: linux-erofs@lists.ozlabs.org
@@ -8924,7 +8924,7 @@ F: include/linux/arm_ffa.h
FIRMWARE LOADER (request_firmware)
M: Luis Chamberlain <mcgrof@kernel.org>
M: Russ Weight <russ.weight@linux.dev>
-M: Danilo Krummrich <dakr@redhat.com>
+M: Danilo Krummrich <dakr@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/firmware_class/
diff --git a/Makefile b/Makefile
index e5b8a8832c0c..5c9b1d2d59b4 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 13
SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
NAME = Baby Opossum Posse
# *DOCUMENTATION*
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index ea5a1dcb133b..4f2eeda907ec 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -6,6 +6,7 @@
config ARC
def_bool y
select ARC_TIMERS
+ select ARCH_HAS_CPU_CACHE_ALIASING
select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DMA_PREP_COHERENT
diff --git a/arch/arc/include/asm/cachetype.h b/arch/arc/include/asm/cachetype.h
new file mode 100644
index 000000000000..acd3b6cb4bf5
--- /dev/null
+++ b/arch/arc/include/asm/cachetype.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ARC_CACHETYPE_H
+#define __ASM_ARC_CACHETYPE_H
+
+#define cpu_dcache_is_aliasing() false
+#define cpu_icache_is_aliasing() true
+
+#endif
diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts
index 19973ab4ea6b..9e10d7a6b5a2 100644
--- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts
+++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts
@@ -233,7 +233,7 @@
#interrupt-cells = <0x1>;
compatible = "pci-host-ecam-generic";
device_type = "pci";
- bus-range = <0x0 0x1>;
+ bus-range = <0x0 0xff>;
reg = <0x0 0x40000000 0x0 0x10000000>;
ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>;
interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi
index 6e5a984c1d4e..26a29e5e5078 100644
--- a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi
+++ b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi
@@ -67,7 +67,7 @@
l2_cache_l0: l2-cache-l0 {
compatible = "cache";
cache-size = <0x80000>;
- cache-line-size = <128>;
+ cache-line-size = <64>;
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
cache-level = <2>;
cache-unified;
@@ -91,7 +91,7 @@
l2_cache_l1: l2-cache-l1 {
compatible = "cache";
cache-size = <0x80000>;
- cache-line-size = <128>;
+ cache-line-size = <64>;
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
cache-level = <2>;
cache-unified;
@@ -115,7 +115,7 @@
l2_cache_l2: l2-cache-l2 {
compatible = "cache";
cache-size = <0x80000>;
- cache-line-size = <128>;
+ cache-line-size = <64>;
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
cache-level = <2>;
cache-unified;
@@ -139,7 +139,7 @@
l2_cache_l3: l2-cache-l3 {
compatible = "cache";
cache-size = <0x80000>;
- cache-line-size = <128>;
+ cache-line-size = <64>;
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
cache-level = <2>;
cache-unified;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 37e24f1bd227..99ea26d400ff 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -36,15 +36,8 @@
#include <asm/traps.h>
#include <asm/vdso.h>
-#ifdef CONFIG_ARM64_GCS
#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
-static bool gcs_signal_cap_valid(u64 addr, u64 val)
-{
- return val == GCS_SIGNAL_CAP(addr);
-}
-#endif
-
/*
* Do a signal return; undo the signal stack. These are aligned to 128-bit.
*/
@@ -1062,8 +1055,7 @@ static int restore_sigframe(struct pt_regs *regs,
#ifdef CONFIG_ARM64_GCS
static int gcs_restore_signal(void)
{
- unsigned long __user *gcspr_el0;
- u64 cap;
+ u64 gcspr_el0, cap;
int ret;
if (!system_supports_gcs())
@@ -1072,7 +1064,7 @@ static int gcs_restore_signal(void)
if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE))
return 0;
- gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0);
+ gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
/*
* Ensure that any changes to the GCS done via GCS operations
@@ -1087,22 +1079,23 @@ static int gcs_restore_signal(void)
* then faults will be generated on GCS operations - the main
* concern is to protect GCS pages.
*/
- ret = copy_from_user(&cap, gcspr_el0, sizeof(cap));
+ ret = copy_from_user(&cap, (unsigned long __user *)gcspr_el0,
+ sizeof(cap));
if (ret)
return -EFAULT;
/*
* Check that the cap is the actual GCS before replacing it.
*/
- if (!gcs_signal_cap_valid((u64)gcspr_el0, cap))
+ if (cap != GCS_SIGNAL_CAP(gcspr_el0))
return -EINVAL;
/* Invalidate the token to prevent reuse */
- put_user_gcs(0, (__user void*)gcspr_el0, &ret);
+ put_user_gcs(0, (unsigned long __user *)gcspr_el0, &ret);
if (ret != 0)
return -EFAULT;
- write_sysreg_s(gcspr_el0 + 1, SYS_GCSPR_EL0);
+ write_sysreg_s(gcspr_el0 + 8, SYS_GCSPR_EL0);
return 0;
}
@@ -1421,7 +1414,7 @@ static int get_sigframe(struct rt_sigframe_user_layout *user,
static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
{
- unsigned long __user *gcspr_el0;
+ u64 gcspr_el0;
int ret = 0;
if (!system_supports_gcs())
@@ -1434,18 +1427,20 @@ static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
* We are entering a signal handler, current register state is
* active.
*/
- gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0);
+ gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
/*
* Push a cap and the GCS entry for the trampoline onto the GCS.
*/
- put_user_gcs((unsigned long)sigtramp, gcspr_el0 - 2, &ret);
- put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 1), gcspr_el0 - 1, &ret);
+ put_user_gcs((unsigned long)sigtramp,
+ (unsigned long __user *)(gcspr_el0 - 16), &ret);
+ put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 8),
+ (unsigned long __user *)(gcspr_el0 - 8), &ret);
if (ret != 0)
return ret;
- gcspr_el0 -= 2;
- write_sysreg_s((unsigned long)gcspr_el0, SYS_GCSPR_EL0);
+ gcspr_el0 -= 16;
+ write_sysreg_s(gcspr_el0, SYS_GCSPR_EL0);
return 0;
}
diff --git a/arch/hexagon/Makefile b/arch/hexagon/Makefile
index 92d005958dfb..ff172cbe5881 100644
--- a/arch/hexagon/Makefile
+++ b/arch/hexagon/Makefile
@@ -32,3 +32,9 @@ KBUILD_LDFLAGS += $(ldflags-y)
TIR_NAME := r19
KBUILD_CFLAGS += -ffixed-$(TIR_NAME) -DTHREADINFO_REG=$(TIR_NAME) -D__linux__
KBUILD_AFLAGS += -DTHREADINFO_REG=$(TIR_NAME)
+
+# Disable HexagonConstExtenders pass for LLVM versions prior to 19.1.0
+# https://github.com/llvm/llvm-project/issues/99714
+ifneq ($(call clang-min-version, 190100),y)
+KBUILD_CFLAGS += -mllvm -hexagon-cext=false
+endif
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 57ded82c2840..e8b3f67bf3f5 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -208,6 +208,7 @@ CONFIG_FB_ATY=y
CONFIG_FB_ATY_CT=y
CONFIG_FB_ATY_GX=y
CONFIG_FB_3DFX=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 4d77e17541e9..ca0c90e95837 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -716,6 +716,7 @@ CONFIG_FB_TRIDENT=m
CONFIG_FB_SM501=m
CONFIG_FB_IBM_GXT4500=y
CONFIG_LCD_PLATFORM=m
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
CONFIG_LOGO=y
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index abe6e6c0ab98..6087d38c7235 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -234,6 +234,8 @@ static unsigned long get_vmem_size(unsigned long identity_size,
vsize = round_up(SZ_2G + max_mappable, rte_size) +
round_up(vmemmap_size, rte_size) +
FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
+ if (IS_ENABLED(CONFIG_KMSAN))
+ vsize += MODULES_LEN * 2;
return size_add(vsize, vmalloc_size);
}
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 145035f84a0e..3fa28db2fe59 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -306,7 +306,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
pages++;
}
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_4K, pages);
}
@@ -339,7 +339,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
}
pgtable_pte_populate(pmd, addr, next, mode);
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_1M, pages);
}
@@ -372,7 +372,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
}
pgtable_pmd_populate(pud, addr, next, mode);
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_2G, pages);
}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index edbb52ce3f1e..7d12a1305fc9 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -270,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
if (len >= sizeof(_value)) \
return -E2BIG; \
len = strscpy(_value, buf, sizeof(_value)); \
- if (len < 0) \
+ if ((ssize_t)len < 0) \
return len; \
strim(_value); \
return len; \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 17b6590748c0..645aa360628d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -452,6 +452,7 @@
#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
+#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c0975815980c..20e6009381ed 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -230,6 +230,8 @@ static inline unsigned long long l1tf_pfn_limit(void)
return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}
+void init_cpu_devs(void);
+void get_cpu_vendor(struct cpuinfo_x86 *c);
extern void early_cpu_init(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 125c407e2abe..41502bd2afd6 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -65,4 +65,19 @@
extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
+extern void __static_call_update_early(void *tramp, void *func);
+
+#define static_call_update_early(name, _func) \
+({ \
+ typeof(&STATIC_CALL_TRAMP(name)) __F = (_func); \
+ if (static_call_initialized) { \
+ __static_call_update(&STATIC_CALL_KEY(name), \
+ STATIC_CALL_TRAMP_ADDR(name), __F);\
+ } else { \
+ WRITE_ONCE(STATIC_CALL_KEY(name).func, _func); \
+ __static_call_update_early(STATIC_CALL_TRAMP_ADDR(name),\
+ __F); \
+ } \
+})
+
#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index ab7382f92aff..96bda43538ee 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -8,7 +8,7 @@
#include <asm/special_insns.h>
#ifdef CONFIG_X86_32
-static inline void iret_to_self(void)
+static __always_inline void iret_to_self(void)
{
asm volatile (
"pushfl\n\t"
@@ -19,7 +19,7 @@ static inline void iret_to_self(void)
: ASM_CALL_CONSTRAINT : : "memory");
}
#else
-static inline void iret_to_self(void)
+static __always_inline void iret_to_self(void)
{
unsigned int tmp;
@@ -55,7 +55,7 @@ static inline void iret_to_self(void)
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/
-static inline void sync_core(void)
+static __always_inline void sync_core(void)
{
/*
* The SERIALIZE instruction is the most straightforward way to
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a2dd24947eb8..97771b9d33af 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -39,9 +39,11 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/pgtable.h>
+#include <linux/instrumentation.h>
#include <trace/events/xen.h>
+#include <asm/alternative.h>
#include <asm/page.h>
#include <asm/smap.h>
#include <asm/nospec-branch.h>
@@ -86,11 +88,20 @@ struct xen_dm_op_buf;
* there aren't more than 5 arguments...)
*/
-extern struct { char _entry[32]; } hypercall_page[];
+void xen_hypercall_func(void);
+DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func);
-#define __HYPERCALL "call hypercall_page+%c[offset]"
-#define __HYPERCALL_ENTRY(x) \
- [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0]))
+#ifdef MODULE
+#define __ADDRESSABLE_xen_hypercall
+#else
+#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall)
+#endif
+
+#define __HYPERCALL \
+ __ADDRESSABLE_xen_hypercall \
+ "call __SCT__xen_hypercall"
+
+#define __HYPERCALL_ENTRY(x) "a" (x)
#ifdef CONFIG_X86_32
#define __HYPERCALL_RETREG "eax"
@@ -148,7 +159,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_0ARG(); \
asm volatile (__HYPERCALL \
: __HYPERCALL_0PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER0); \
(type)__res; \
})
@@ -159,7 +170,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_1ARG(a1); \
asm volatile (__HYPERCALL \
: __HYPERCALL_1PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER1); \
(type)__res; \
})
@@ -170,7 +181,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_2ARG(a1, a2); \
asm volatile (__HYPERCALL \
: __HYPERCALL_2PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER2); \
(type)__res; \
})
@@ -181,7 +192,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_3ARG(a1, a2, a3); \
asm volatile (__HYPERCALL \
: __HYPERCALL_3PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER3); \
(type)__res; \
})
@@ -192,7 +203,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_4ARG(a1, a2, a3, a4); \
asm volatile (__HYPERCALL \
: __HYPERCALL_4PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER4); \
(type)__res; \
})
@@ -206,12 +217,9 @@ xen_single_call(unsigned int call,
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
- if (call >= PAGE_SIZE / sizeof(hypercall_page[0]))
- return -EINVAL;
-
- asm volatile(CALL_NOSPEC
+ asm volatile(__HYPERCALL
: __HYPERCALL_5PARAM
- : [thunk_target] "a" (&hypercall_page[call])
+ : __HYPERCALL_ENTRY(call)
: __HYPERCALL_CLOBBER5);
return (long)__res;
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 465647456753..f17d16607882 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -143,11 +143,6 @@ static bool skip_addr(void *dest)
dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE)
return true;
#endif
-#ifdef CONFIG_XEN
- if (dest >= (void *)hypercall_page &&
- dest < (void*)hypercall_page + PAGE_SIZE)
- return true;
-#endif
return false;
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5c28975c608..3e9037690814 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -867,7 +867,7 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
}
-static void get_cpu_vendor(struct cpuinfo_x86 *c)
+void get_cpu_vendor(struct cpuinfo_x86 *c)
{
char *v = c->x86_vendor_id;
int i;
@@ -1649,15 +1649,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
detect_nopl();
}
-void __init early_cpu_init(void)
+void __init init_cpu_devs(void)
{
const struct cpu_dev *const *cdev;
int count = 0;
-#ifdef CONFIG_PROCESSOR_SELECT
- pr_info("KERNEL supported cpus:\n");
-#endif
-
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
const struct cpu_dev *cpudev = *cdev;
@@ -1665,20 +1661,30 @@ void __init early_cpu_init(void)
break;
cpu_devs[count] = cpudev;
count++;
+ }
+}
+void __init early_cpu_init(void)
+{
#ifdef CONFIG_PROCESSOR_SELECT
- {
- unsigned int j;
-
- for (j = 0; j < 2; j++) {
- if (!cpudev->c_ident[j])
- continue;
- pr_info(" %s %s\n", cpudev->c_vendor,
- cpudev->c_ident[j]);
- }
- }
+ unsigned int i, j;
+
+ pr_info("KERNEL supported cpus:\n");
#endif
+
+ init_cpu_devs();
+
+#ifdef CONFIG_PROCESSOR_SELECT
+ for (i = 0; i < X86_VENDOR_NUM && cpu_devs[i]; i++) {
+ for (j = 0; j < 2; j++) {
+ if (!cpu_devs[i]->c_ident[j])
+ continue;
+ pr_info(" %s %s\n", cpu_devs[i]->c_vendor,
+ cpu_devs[i]->c_ident[j]);
+ }
}
+#endif
+
early_identify_cpu(&boot_cpu_data);
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index d18078834ded..dc12fe5ef3ca 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -223,6 +223,63 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs)
hyperv_cleanup();
}
#endif /* CONFIG_CRASH_DUMP */
+
+static u64 hv_ref_counter_at_suspend;
+static void (*old_save_sched_clock_state)(void);
+static void (*old_restore_sched_clock_state)(void);
+
+/*
+ * Hyper-V clock counter resets during hibernation. Save and restore clock
+ * offset during suspend/resume, while also considering the time passed
+ * before suspend. This is to make sure that sched_clock using hv tsc page
+ * based clocksource, proceeds from where it left off during suspend and
+ * it shows correct time for the timestamps of kernel messages after resume.
+ */
+static void save_hv_clock_tsc_state(void)
+{
+ hv_ref_counter_at_suspend = hv_read_reference_counter();
+}
+
+static void restore_hv_clock_tsc_state(void)
+{
+ /*
+ * Adjust the offsets used by hv tsc clocksource to
+ * account for the time spent before hibernation.
+ * adjusted value = reference counter (time) at suspend
+ * - reference counter (time) now.
+ */
+ hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter());
+}
+
+/*
+ * Functions to override save_sched_clock_state and restore_sched_clock_state
+ * functions of x86_platform. The Hyper-V clock counter is reset during
+ * suspend-resume and the offset used to measure time needs to be
+ * corrected, post resume.
+ */
+static void hv_save_sched_clock_state(void)
+{
+ old_save_sched_clock_state();
+ save_hv_clock_tsc_state();
+}
+
+static void hv_restore_sched_clock_state(void)
+{
+ restore_hv_clock_tsc_state();
+ old_restore_sched_clock_state();
+}
+
+static void __init x86_setup_ops_for_tsc_pg_clock(void)
+{
+ if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
+ return;
+
+ old_save_sched_clock_state = x86_platform.save_sched_clock_state;
+ x86_platform.save_sched_clock_state = hv_save_sched_clock_state;
+
+ old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
+ x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
+}
#endif /* CONFIG_HYPERV */
static uint32_t __init ms_hyperv_platform(void)
@@ -579,6 +636,7 @@ static void __init ms_hyperv_init_platform(void)
/* Register Hyper-V specific clocksource */
hv_init_clocksource();
+ x86_setup_ops_for_tsc_pg_clock();
hv_vtl_init_platform();
#endif
/*
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 4eefaac64c6c..9eed0c144dad 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -172,6 +172,15 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
}
EXPORT_SYMBOL_GPL(arch_static_call_transform);
+noinstr void __static_call_update_early(void *tramp, void *func)
+{
+ BUG_ON(system_state != SYSTEM_BOOTING);
+ BUG_ON(!early_boot_irqs_disabled);
+ BUG_ON(static_call_initialized);
+ __text_gen_insn(tramp, JMP32_INSN_OPCODE, tramp, func, JMP32_INSN_SIZE);
+ sync_core();
+}
+
#ifdef CONFIG_MITIGATION_RETHUNK
/*
* This is called by apply_returns() to fix up static call trampolines,
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index fab3ac9a4574..6a17396c8174 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -519,14 +519,10 @@ INIT_PER_CPU(irq_stack_backing_store);
* linker will never mark as relocatable. (Using just ABSOLUTE() is not
* sufficient for that).
*/
-#ifdef CONFIG_XEN
#ifdef CONFIG_XEN_PV
xen_elfnote_entry_value =
ABSOLUTE(xen_elfnote_entry) + ABSOLUTE(startup_xen);
#endif
-xen_elfnote_hypercall_page_value =
- ABSOLUTE(xen_elfnote_hypercall_page) + ABSOLUTE(hypercall_page);
-#endif
#ifdef CONFIG_PVH
xen_elfnote_phys32_entry_value =
ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 22e7ad235123..2401606db260 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3364,18 +3364,6 @@ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu,
return true;
}
-static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
-{
- if (fault->exec)
- return is_executable_pte(spte);
-
- if (fault->write)
- return is_writable_pte(spte);
-
- /* Fault was on Read access */
- return spte & PT_PRESENT_MASK;
-}
-
/*
* Returns the last level spte pointer of the shadow page walk for the given
* gpa, and sets *spte to the spte value. This spte may be non-preset. If no
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index f332b33bc817..af10bc0380a3 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -462,6 +462,23 @@ static inline bool is_mmu_writable_spte(u64 spte)
}
/*
+ * Returns true if the access indicated by @fault is allowed by the existing
+ * SPTE protections. Note, the caller is responsible for checking that the
+ * SPTE is a shadow-present, leaf SPTE (either before or after).
+ */
+static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
+{
+ if (fault->exec)
+ return is_executable_pte(spte);
+
+ if (fault->write)
+ return is_writable_pte(spte);
+
+ /* Fault was on Read access */
+ return spte & PT_PRESENT_MASK;
+}
+
+/*
* If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for
* write-tracking, remote TLBs must be flushed, even if the SPTE was read-only,
* as KVM allows stale Writable TLB entries to exist. When dirty logging, KVM
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 4508d868f1cd..2f15e0e33903 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -985,6 +985,11 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
if (fault->prefetch && is_shadow_present_pte(iter->old_spte))
return RET_PF_SPURIOUS;
+ if (is_shadow_present_pte(iter->old_spte) &&
+ is_access_allowed(fault, iter->old_spte) &&
+ is_last_spte(iter->old_spte, iter->level))
+ return RET_PF_SPURIOUS;
+
if (unlikely(!fault->slot))
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
else
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 4b74ea91f4e6..65fd245a9953 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -1199,6 +1199,12 @@ bool avic_hardware_setup(void)
return false;
}
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
+ !boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) {
+ pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n");
+ return false;
+ }
+
if (boot_cpu_has(X86_FEATURE_AVIC)) {
pr_info("AVIC enabled\n");
} else if (force_avic) {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index dd15cc635655..21dacd312779 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3201,15 +3201,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (data & ~supported_de_cfg)
return 1;
- /*
- * Don't let the guest change the host-programmed value. The
- * MSR is very model specific, i.e. contains multiple bits that
- * are completely unknown to KVM, and the one bit known to KVM
- * is simply a reflection of hardware capabilities.
- */
- if (!msr->host_initiated && data != svm->msr_decfg)
- return 1;
-
svm->msr_decfg = data;
break;
}
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index 1715d2ab07be..ad9116a99bcc 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -2,7 +2,7 @@
#ifndef __KVM_X86_VMX_POSTED_INTR_H
#define __KVM_X86_VMX_POSTED_INTR_H
-#include <linux/find.h>
+#include <linux/bitmap.h>
#include <asm/posted_intr.h>
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8160baf3838..c79a8cc57ba4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9976,7 +9976,7 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
{
u64 ret = vcpu->run->hypercall.ret;
- if (!is_64_bit_mode(vcpu))
+ if (!is_64_bit_hypercall(vcpu))
ret = (u32)ret;
kvm_rax_write(vcpu, ret);
++vcpu->stat.hypercalls;
@@ -12724,6 +12724,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_hv_init_vm(kvm);
kvm_xen_init_vm(kvm);
+ if (ignore_msrs && !report_ignored_msrs) {
+ pr_warn_once("Running KVM with ignore_msrs=1 and report_ignored_msrs=0 is not a\n"
+ "a supported configuration. Lying to the guest about the existence of MSRs\n"
+ "may cause the guest operating system to hang or produce errors. If a guest\n"
+ "does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n");
+ }
+
return 0;
out_uninit_mmu:
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 84e5adbd0925..43dcd8c7badc 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -2,6 +2,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
+#include <linux/instrumentation.h>
#include <linux/kexec.h>
#include <linux/memblock.h>
#include <linux/slab.h>
@@ -21,7 +22,8 @@
#include "xen-ops.h"
-EXPORT_SYMBOL_GPL(hypercall_page);
+DEFINE_STATIC_CALL(xen_hypercall, xen_hypercall_hvm);
+EXPORT_STATIC_CALL_TRAMP(xen_hypercall);
/*
* Pointer to the xen_vcpu_info structure or
@@ -68,6 +70,67 @@ EXPORT_SYMBOL(xen_start_flags);
*/
struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
+static __ref void xen_get_vendor(void)
+{
+ init_cpu_devs();
+ cpu_detect(&boot_cpu_data);
+ get_cpu_vendor(&boot_cpu_data);
+}
+
+void xen_hypercall_setfunc(void)
+{
+ if (static_call_query(xen_hypercall) != xen_hypercall_hvm)
+ return;
+
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON))
+ static_call_update(xen_hypercall, xen_hypercall_amd);
+ else
+ static_call_update(xen_hypercall, xen_hypercall_intel);
+}
+
+/*
+ * Evaluate processor vendor in order to select the correct hypercall
+ * function for HVM/PVH guests.
+ * Might be called very early in boot before vendor has been set by
+ * early_cpu_init().
+ */
+noinstr void *__xen_hypercall_setfunc(void)
+{
+ void (*func)(void);
+
+ /*
+ * Xen is supported only on CPUs with CPUID, so testing for
+ * X86_FEATURE_CPUID is a test for early_cpu_init() having been
+ * run.
+ *
+ * Note that __xen_hypercall_setfunc() is noinstr only due to a nasty
+ * dependency chain: it is being called via the xen_hypercall static
+ * call when running as a PVH or HVM guest. Hypercalls need to be
+ * noinstr due to PV guests using hypercalls in noinstr code. So we
+ * can safely tag the function body as "instrumentation ok", since
+ * the PV guest requirement is not of interest here (xen_get_vendor()
+ * calls noinstr functions, and static_call_update_early() might do
+ * so, too).
+ */
+ instrumentation_begin();
+
+ if (!boot_cpu_has(X86_FEATURE_CPUID))
+ xen_get_vendor();
+
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON))
+ func = xen_hypercall_amd;
+ else
+ func = xen_hypercall_intel;
+
+ static_call_update_early(xen_hypercall, func);
+
+ instrumentation_end();
+
+ return func;
+}
+
static int xen_cpu_up_online(unsigned int cpu)
{
xen_init_lock_cpu(cpu);
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 24d2957a4726..fe57ff85d004 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -106,15 +106,8 @@ static void __init init_hvm_pv_info(void)
/* PVH set up hypercall page in xen_prepare_pvh(). */
if (xen_pvh_domain())
pv_info.name = "Xen PVH";
- else {
- u64 pfn;
- uint32_t msr;
-
+ else
pv_info.name = "Xen HVM";
- msr = cpuid_ebx(base + 2);
- pfn = __pa(hypercall_page);
- wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
- }
xen_setup_features();
@@ -300,6 +293,10 @@ static uint32_t __init xen_platform_hvm(void)
if (xen_pv_domain())
return 0;
+ /* Set correct hypercall function. */
+ if (xen_domain)
+ xen_hypercall_setfunc();
+
if (xen_pvh_domain() && nopv) {
/* Guest booting via the Xen-PVH boot entry goes here */
pr_info("\"nopv\" parameter is ignored in PVH guest\n");
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index d6818c6cafda..a8eb7e0c473c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1341,6 +1341,9 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
xen_domain_type = XEN_PV_DOMAIN;
xen_start_flags = xen_start_info->flags;
+ /* Interrupts are guaranteed to be off initially. */
+ early_boot_irqs_disabled = true;
+ static_call_update_early(xen_hypercall, xen_hypercall_pv);
xen_setup_features();
@@ -1431,7 +1434,6 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
local_irq_disable();
- early_boot_irqs_disabled = true;
xen_raw_console_write("mapping kernel into physical memory\n");
xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index bf68c329fc01..0e3d930bcb89 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -129,17 +129,10 @@ static void __init pvh_arch_setup(void)
void __init xen_pvh_init(struct boot_params *boot_params)
{
- u32 msr;
- u64 pfn;
-
xen_pvh = 1;
xen_domain_type = XEN_HVM_DOMAIN;
xen_start_flags = pvh_start_info.flags;
- msr = cpuid_ebx(xen_cpuid_base() + 2);
- pfn = __pa(hypercall_page);
- wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
-
x86_init.oem.arch_setup = pvh_arch_setup;
x86_init.oem.banner = xen_banner;
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 83189cf5cdce..b518f36d1ca2 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -20,10 +20,33 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <linux/objtool.h>
#include <../entry/calling.h>
.pushsection .noinstr.text, "ax"
/*
+ * PV hypercall interface to the hypervisor.
+ *
+ * Called via inline asm(), so better preserve %rcx and %r11.
+ *
+ * Input:
+ * %eax: hypercall number
+ * %rdi, %rsi, %rdx, %r10, %r8: args 1..5 for the hypercall
+ * Output: %rax
+ */
+SYM_FUNC_START(xen_hypercall_pv)
+ ANNOTATE_NOENDBR
+ push %rcx
+ push %r11
+ UNWIND_HINT_SAVE
+ syscall
+ UNWIND_HINT_RESTORE
+ pop %r11
+ pop %rcx
+ RET
+SYM_FUNC_END(xen_hypercall_pv)
+
+/*
* Disabling events is simply a matter of making the event mask
* non-zero.
*/
@@ -176,7 +199,6 @@ SYM_CODE_START(xen_early_idt_handler_array)
SYM_CODE_END(xen_early_idt_handler_array)
__FINIT
-hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
* Xen64 iret frame:
*
@@ -186,17 +208,28 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
* cs
* rip <-- standard iret frame
*
- * flags
+ * flags <-- xen_iret must push from here on
*
- * rcx }
- * r11 }<-- pushed by hypercall page
- * rsp->rax }
+ * rcx
+ * r11
+ * rsp->rax
*/
+.macro xen_hypercall_iret
+ pushq $0 /* Flags */
+ push %rcx
+ push %r11
+ push %rax
+ mov $__HYPERVISOR_iret, %eax
+ syscall /* Do the IRET. */
+#ifdef CONFIG_MITIGATION_SLS
+ int3
+#endif
+.endm
+
SYM_CODE_START(xen_iret)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
- pushq $0
- jmp hypercall_iret
+ xen_hypercall_iret
SYM_CODE_END(xen_iret)
/*
@@ -301,8 +334,7 @@ SYM_CODE_START(xen_entry_SYSENTER_compat)
ENDBR
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
- pushq $0
- jmp hypercall_iret
+ xen_hypercall_iret
SYM_CODE_END(xen_entry_SYSENTER_compat)
SYM_CODE_END(xen_entry_SYSCALL_compat)
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7f6c69dbb816..9252652afe59 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -6,9 +6,11 @@
#include <linux/elfnote.h>
#include <linux/init.h>
+#include <linux/instrumentation.h>
#include <asm/boot.h>
#include <asm/asm.h>
+#include <asm/frame.h>
#include <asm/msr.h>
#include <asm/page_types.h>
#include <asm/percpu.h>
@@ -20,28 +22,6 @@
#include <xen/interface/xen-mca.h>
#include <asm/xen/interface.h>
-.pushsection .noinstr.text, "ax"
- .balign PAGE_SIZE
-SYM_CODE_START(hypercall_page)
- .rept (PAGE_SIZE / 32)
- UNWIND_HINT_FUNC
- ANNOTATE_NOENDBR
- ANNOTATE_UNRET_SAFE
- ret
- /*
- * Xen will write the hypercall page, and sort out ENDBR.
- */
- .skip 31, 0xcc
- .endr
-
-#define HYPERCALL(n) \
- .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
- .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
-#include <asm/xen-hypercalls.h>
-#undef HYPERCALL
-SYM_CODE_END(hypercall_page)
-.popsection
-
#ifdef CONFIG_XEN_PV
__INIT
SYM_CODE_START(startup_xen)
@@ -87,6 +67,87 @@ SYM_CODE_END(xen_cpu_bringup_again)
#endif
#endif
+ .pushsection .noinstr.text, "ax"
+/*
+ * Xen hypercall interface to the hypervisor.
+ *
+ * Input:
+ * %eax: hypercall number
+ * 32-bit:
+ * %ebx, %ecx, %edx, %esi, %edi: args 1..5 for the hypercall
+ * 64-bit:
+ * %rdi, %rsi, %rdx, %r10, %r8: args 1..5 for the hypercall
+ * Output: %[er]ax
+ */
+SYM_FUNC_START(xen_hypercall_hvm)
+ ENDBR
+ FRAME_BEGIN
+ /* Save all relevant registers (caller save and arguments). */
+#ifdef CONFIG_X86_32
+ push %eax
+ push %ebx
+ push %ecx
+ push %edx
+ push %esi
+ push %edi
+#else
+ push %rax
+ push %rcx
+ push %rdx
+ push %rdi
+ push %rsi
+ push %r11
+ push %r10
+ push %r9
+ push %r8
+#ifdef CONFIG_FRAME_POINTER
+ pushq $0 /* Dummy push for stack alignment. */
+#endif
+#endif
+ /* Set the vendor specific function. */
+ call __xen_hypercall_setfunc
+ /* Set ZF = 1 if AMD, Restore saved registers. */
+#ifdef CONFIG_X86_32
+ lea xen_hypercall_amd, %ebx
+ cmp %eax, %ebx
+ pop %edi
+ pop %esi
+ pop %edx
+ pop %ecx
+ pop %ebx
+ pop %eax
+#else
+ lea xen_hypercall_amd(%rip), %rbx
+ cmp %rax, %rbx
+#ifdef CONFIG_FRAME_POINTER
+ pop %rax /* Dummy pop. */
+#endif
+ pop %r8
+ pop %r9
+ pop %r10
+ pop %r11
+ pop %rsi
+ pop %rdi
+ pop %rdx
+ pop %rcx
+ pop %rax
+#endif
+ /* Use correct hypercall function. */
+ jz xen_hypercall_amd
+ jmp xen_hypercall_intel
+SYM_FUNC_END(xen_hypercall_hvm)
+
+SYM_FUNC_START(xen_hypercall_amd)
+ vmmcall
+ RET
+SYM_FUNC_END(xen_hypercall_amd)
+
+SYM_FUNC_START(xen_hypercall_intel)
+ vmcall
+ RET
+SYM_FUNC_END(xen_hypercall_intel)
+ .popsection
+
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
@@ -116,8 +177,6 @@ SYM_CODE_END(xen_cpu_bringup_again)
#else
# define FEATURES_DOM0 0
#endif
- ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .globl xen_elfnote_hypercall_page;
- xen_elfnote_hypercall_page: _ASM_PTR xen_elfnote_hypercall_page_value - .)
ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
.long FEATURES_PV | FEATURES_PVH | FEATURES_DOM0)
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index e1b782e823e6..63c13a2ccf55 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -326,4 +326,13 @@ static inline void xen_smp_intr_free_pv(unsigned int cpu) {}
static inline void xen_smp_count_cpus(void) { }
#endif /* CONFIG_SMP */
+#ifdef CONFIG_XEN_PV
+void xen_hypercall_pv(void);
+#endif
+void xen_hypercall_hvm(void);
+void xen_hypercall_amd(void);
+void xen_hypercall_intel(void);
+void xen_hypercall_setfunc(void);
+void *__xen_hypercall_setfunc(void);
+
#endif /* XEN_OPS_H */
diff --git a/block/bdev.c b/block/bdev.c
index 738e3c8457e7..9d73a8fbf7f9 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -155,8 +155,7 @@ int set_blocksize(struct file *file, int size)
struct inode *inode = file->f_mapping->host;
struct block_device *bdev = I_BDEV(inode);
- /* Size must be a power of two, and between 512 and PAGE_SIZE */
- if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
+ if (blk_validate_block_size(size))
return -EINVAL;
/* Size cannot be smaller than the size supported by the device */
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index cd5ea6eaa76b..156e9bb07abf 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -275,13 +275,15 @@ void blk_mq_sysfs_unregister_hctxs(struct request_queue *q)
struct blk_mq_hw_ctx *hctx;
unsigned long i;
- lockdep_assert_held(&q->sysfs_dir_lock);
-
+ mutex_lock(&q->sysfs_dir_lock);
if (!q->mq_sysfs_init_done)
- return;
+ goto unlock;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx);
+
+unlock:
+ mutex_unlock(&q->sysfs_dir_lock);
}
int blk_mq_sysfs_register_hctxs(struct request_queue *q)
@@ -290,10 +292,9 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q)
unsigned long i;
int ret = 0;
- lockdep_assert_held(&q->sysfs_dir_lock);
-
+ mutex_lock(&q->sysfs_dir_lock);
if (!q->mq_sysfs_init_done)
- return ret;
+ goto unlock;
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx);
@@ -301,5 +302,8 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q)
break;
}
+unlock:
+ mutex_unlock(&q->sysfs_dir_lock);
+
return ret;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6b6111513986..8ac19d4ae3c0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4412,6 +4412,15 @@ struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
}
EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue);
+/*
+ * Only hctx removed from cpuhp list can be reused
+ */
+static bool blk_mq_hctx_is_reusable(struct blk_mq_hw_ctx *hctx)
+{
+ return hlist_unhashed(&hctx->cpuhp_online) &&
+ hlist_unhashed(&hctx->cpuhp_dead);
+}
+
static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
struct blk_mq_tag_set *set, struct request_queue *q,
int hctx_idx, int node)
@@ -4421,7 +4430,7 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
/* reuse dead hctx first */
spin_lock(&q->unused_hctx_lock);
list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
- if (tmp->numa_node == node) {
+ if (tmp->numa_node == node && blk_mq_hctx_is_reusable(tmp)) {
hctx = tmp;
break;
}
@@ -4453,8 +4462,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
unsigned long i, j;
/* protect against switching io scheduler */
- lockdep_assert_held(&q->sysfs_lock);
-
+ mutex_lock(&q->sysfs_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
int old_node;
int node = blk_mq_get_hctx_node(set, i);
@@ -4487,6 +4495,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
xa_for_each_start(&q->hctx_table, j, hctx, j)
blk_mq_exit_hctx(q, set, hctx, j);
+ mutex_unlock(&q->sysfs_lock);
/* unregister cpuhp callbacks for exited hctxs */
blk_mq_remove_hw_queues_cpuhp(q);
@@ -4518,14 +4527,10 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
xa_init(&q->hctx_table);
- mutex_lock(&q->sysfs_lock);
-
blk_mq_realloc_hw_ctxs(set, q);
if (!q->nr_hw_queues)
goto err_hctxs;
- mutex_unlock(&q->sysfs_lock);
-
INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
@@ -4544,7 +4549,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
return 0;
err_hctxs:
- mutex_unlock(&q->sysfs_lock);
blk_mq_release(q);
err_exit:
q->mq_ops = NULL;
@@ -4925,12 +4929,12 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
return false;
/* q->elevator needs protection from ->sysfs_lock */
- lockdep_assert_held(&q->sysfs_lock);
+ mutex_lock(&q->sysfs_lock);
/* the check has to be done with holding sysfs_lock */
if (!q->elevator) {
kfree(qe);
- goto out;
+ goto unlock;
}
INIT_LIST_HEAD(&qe->node);
@@ -4940,7 +4944,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
__elevator_get(qe->type);
list_add(&qe->node, head);
elevator_disable(q);
-out:
+unlock:
+ mutex_unlock(&q->sysfs_lock);
+
return true;
}
@@ -4969,9 +4975,11 @@ static void blk_mq_elv_switch_back(struct list_head *head,
list_del(&qe->node);
kfree(qe);
+ mutex_lock(&q->sysfs_lock);
elevator_switch(q, t);
/* drop the reference acquired in blk_mq_elv_switch_none */
elevator_put(t);
+ mutex_unlock(&q->sysfs_lock);
}
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
@@ -4991,11 +4999,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
return;
- list_for_each_entry(q, &set->tag_list, tag_set_list) {
- mutex_lock(&q->sysfs_dir_lock);
- mutex_lock(&q->sysfs_lock);
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_freeze_queue(q);
- }
/*
* Switch IO scheduler to 'none', cleaning up the data associated
* with the previous scheduler. We will switch back once we are done
@@ -5051,11 +5056,8 @@ switch_back:
list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_elv_switch_back(&head, q);
- list_for_each_entry(q, &set->tag_list, tag_set_list) {
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_unfreeze_queue(q);
- mutex_unlock(&q->sysfs_lock);
- mutex_unlock(&q->sysfs_dir_lock);
- }
/* Free the excess tags when nr_hw_queues shrink. */
for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 64f70c713d2f..767598e719ab 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -706,11 +706,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
if (entry->load_module)
entry->load_module(disk, page, length);
- mutex_lock(&q->sysfs_lock);
blk_mq_freeze_queue(q);
+ mutex_lock(&q->sysfs_lock);
res = entry->store(disk, page, length);
- blk_mq_unfreeze_queue(q);
mutex_unlock(&q->sysfs_lock);
+ blk_mq_unfreeze_queue(q);
return res;
}
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index d8e97a760fbc..16178054e629 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -409,7 +409,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
mutex_lock(&bo->lock);
drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u",
- bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size,
+ bo, bo->ctx ? bo->ctx->id : 0, bo->vpu_addr, bo->base.base.size,
bo->flags, kref_read(&bo->base.base.refcount));
if (bo->base.pages)
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index 891967a95bc3..0af614dfb6f9 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -612,18 +612,22 @@ int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev)
if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) {
ivpu_err(vdev, "Failed to allocate root page table for reserved context\n");
ret = -ENOMEM;
- goto unlock;
+ goto err_ctx_fini;
}
ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable);
if (ret) {
ivpu_err(vdev, "Failed to set context descriptor for reserved context\n");
- goto unlock;
+ goto err_ctx_fini;
}
-unlock:
mutex_unlock(&vdev->rctx.lock);
return ret;
+
+err_ctx_fini:
+ mutex_unlock(&vdev->rctx.lock);
+ ivpu_mmu_context_fini(vdev, &vdev->rctx);
+ return ret;
}
void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index dbc0711e28d1..949f4233946c 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -378,6 +378,7 @@ void ivpu_pm_init(struct ivpu_device *vdev)
pm_runtime_use_autosuspend(dev);
pm_runtime_set_autosuspend_delay(dev, delay);
+ pm_runtime_set_active(dev);
ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay);
}
@@ -392,7 +393,6 @@ void ivpu_pm_enable(struct ivpu_device *vdev)
{
struct device *dev = vdev->drm.dev;
- pm_runtime_set_active(dev);
pm_runtime_allow(dev);
pm_runtime_mark_last_busy(dev);
pm_runtime_put_autosuspend(dev);
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index d65cd08ba8e1..d81b55f5068c 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -135,10 +135,10 @@ config ACPI_REV_OVERRIDE_POSSIBLE
config ACPI_EC
bool "Embedded Controller"
depends on HAS_IOPORT
- default X86
+ default X86 || LOONGARCH
help
This driver handles communication with the microcontroller
- on many x86 laptops and other machines.
+ on many x86/LoongArch laptops and other machines.
config ACPI_EC_DEBUGFS
tristate "EC read/write access through /sys/kernel/debug/ec"
diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index 21545ffba065..8934e6ad5772 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -489,7 +489,7 @@ config IMG_ASCII_LCD
config HT16K33
tristate "Holtek Ht16K33 LED controller with keyscan"
- depends on FB && I2C && INPUT
+ depends on FB && I2C && INPUT && BACKLIGHT_CLASS_DEVICE
select FB_SYSMEM_HELPERS
select INPUT_MATRIXKMAP
select FB_BACKLIGHT
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 3dee026988dc..45df5eeabc5e 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -614,6 +614,12 @@ static ssize_t backing_dev_store(struct device *dev,
}
nr_pages = i_size_read(inode) >> PAGE_SHIFT;
+ /* Refuse to use zero sized device (also prevents self reference) */
+ if (!nr_pages) {
+ err = -EINVAL;
+ goto out;
+ }
+
bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
if (!bitmap) {
@@ -1438,12 +1444,16 @@ static void zram_meta_free(struct zram *zram, u64 disksize)
size_t num_pages = disksize >> PAGE_SHIFT;
size_t index;
+ if (!zram->table)
+ return;
+
/* Free all pages that are still in this zram device */
for (index = 0; index < num_pages; index++)
zram_free_page(zram, index);
zs_destroy_pool(zram->mem_pool);
vfree(zram->table);
+ zram->table = NULL;
}
static bool zram_meta_alloc(struct zram *zram, u64 disksize)
@@ -2320,11 +2330,6 @@ static void zram_reset_device(struct zram *zram)
zram->limit_pages = 0;
- if (!init_done(zram)) {
- up_write(&zram->init_lock);
- return;
- }
-
set_capacity_and_notify(zram->disk, 0);
part_stat_set_all(zram->disk->part0, 0);
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 99177835cade..b39dee7b93af 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -27,7 +27,8 @@
#include <asm/mshyperv.h>
static struct clock_event_device __percpu *hv_clock_event;
-static u64 hv_sched_clock_offset __ro_after_init;
+/* Note: offset can hold negative values after hibernation. */
+static u64 hv_sched_clock_offset __read_mostly;
/*
* If false, we're using the old mechanism for stimer0 interrupts
@@ -470,6 +471,17 @@ static void resume_hv_clock_tsc(struct clocksource *arg)
hv_set_msr(HV_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
}
+/*
+ * Called during resume from hibernation, from overridden
+ * x86_platform.restore_sched_clock_state routine. This is to adjust offsets
+ * used to calculate time for hv tsc page based sched_clock, to account for
+ * time spent before hibernation.
+ */
+void hv_adj_sched_clock_offset(u64 offset)
+{
+ hv_sched_clock_offset -= offset;
+}
+
#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
static int hv_cs_enable(struct clocksource *cs)
{
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index d7630bab2516..66e5dfc711c0 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -374,15 +374,19 @@ static inline int amd_pstate_cppc_enable(bool enable)
static int msr_init_perf(struct amd_cpudata *cpudata)
{
- u64 cap1;
+ u64 cap1, numerator;
int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
&cap1);
if (ret)
return ret;
- WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
- WRITE_ONCE(cpudata->max_limit_perf, AMD_CPPC_HIGHEST_PERF(cap1));
+ ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator);
+ if (ret)
+ return ret;
+
+ WRITE_ONCE(cpudata->highest_perf, numerator);
+ WRITE_ONCE(cpudata->max_limit_perf, numerator);
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
@@ -394,13 +398,18 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
static int shmem_init_perf(struct amd_cpudata *cpudata)
{
struct cppc_perf_caps cppc_perf;
+ u64 numerator;
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
if (ret)
return ret;
- WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf);
- WRITE_ONCE(cpudata->max_limit_perf, cppc_perf.highest_perf);
+ ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator);
+ if (ret)
+ return ret;
+
+ WRITE_ONCE(cpudata->highest_perf, numerator);
+ WRITE_ONCE(cpudata->max_limit_perf, numerator);
WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
cppc_perf.lowest_nonlinear_perf);
@@ -561,16 +570,13 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
{
- u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf;
+ u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq;
struct amd_cpudata *cpudata = policy->driver_data;
- if (cpudata->boost_supported && !policy->boost_enabled)
- max_perf = READ_ONCE(cpudata->nominal_perf);
- else
- max_perf = READ_ONCE(cpudata->highest_perf);
-
- max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq);
- min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq);
+ max_perf = READ_ONCE(cpudata->highest_perf);
+ max_freq = READ_ONCE(cpudata->max_freq);
+ max_limit_perf = div_u64(policy->max * max_perf, max_freq);
+ min_limit_perf = div_u64(policy->min * max_perf, max_freq);
lowest_perf = READ_ONCE(cpudata->lowest_perf);
if (min_limit_perf < lowest_perf)
@@ -889,7 +895,6 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
{
int ret;
u32 min_freq, max_freq;
- u64 numerator;
u32 nominal_perf, nominal_freq;
u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
u32 boost_ratio, lowest_nonlinear_ratio;
@@ -911,10 +916,7 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
nominal_perf = READ_ONCE(cpudata->nominal_perf);
- ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator);
- if (ret)
- return ret;
- boost_ratio = div_u64(numerator << SCHED_CAPACITY_SHIFT, nominal_perf);
+ boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
@@ -1869,18 +1871,18 @@ static int __init amd_pstate_init(void)
static_call_update(amd_pstate_update_perf, shmem_update_perf);
}
- ret = amd_pstate_register_driver(cppc_state);
- if (ret) {
- pr_err("failed to register with return %d\n", ret);
- return ret;
- }
-
if (amd_pstate_prefcore) {
ret = amd_detect_prefcore(&amd_pstate_prefcore);
if (ret)
return ret;
}
+ ret = amd_pstate_register_driver(cppc_state);
+ if (ret) {
+ pr_err("failed to register with return %d\n", ret);
+ return ret;
+ }
+
dev_root = bus_get_dev_root(&cpu_subsys);
if (dev_root) {
ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group);
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index d77899650798..b98b1ccffd1c 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1295,6 +1295,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
struct cxl_region_params *p = &cxlr->params;
struct cxl_decoder *cxld = cxl_rr->decoder;
struct cxl_switch_decoder *cxlsd;
+ struct cxl_port *iter = port;
u16 eig, peig;
u8 eiw, peiw;
@@ -1311,16 +1312,26 @@ static int cxl_port_setup_targets(struct cxl_port *port,
cxlsd = to_cxl_switch_decoder(&cxld->dev);
if (cxl_rr->nr_targets_set) {
- int i, distance;
+ int i, distance = 1;
+ struct cxl_region_ref *cxl_rr_iter;
/*
- * Passthrough decoders impose no distance requirements between
- * peers
+ * The "distance" between peer downstream ports represents which
+ * endpoint positions in the region interleave a given port can
+ * host.
+ *
+ * For example, at the root of a hierarchy the distance is
+ * always 1 as every index targets a different host-bridge. At
+ * each subsequent switch level those ports map every Nth region
+ * position where N is the width of the switch == distance.
*/
- if (cxl_rr->nr_targets == 1)
- distance = 0;
- else
- distance = p->nr_targets / cxl_rr->nr_targets;
+ do {
+ cxl_rr_iter = cxl_rr_load(iter, cxlr);
+ distance *= cxl_rr_iter->nr_targets;
+ iter = to_cxl_port(iter->dev.parent);
+ } while (!is_cxl_root(iter));
+ distance *= cxlrd->cxlsd.cxld.interleave_ways;
+
for (i = 0; i < cxl_rr->nr_targets_set; i++)
if (ep->dport == cxlsd->target[i]) {
rc = check_last_peer(cxled, ep, cxl_rr,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 0241d1d7133a..6d94ff4a4f1a 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -836,6 +836,9 @@ static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size
if (!root_dev)
return -ENXIO;
+ if (!dport->regs.rcd_pcie_cap)
+ return -ENXIO;
+
guard(device)(root_dev);
if (!root_dev->driver)
return -ENXIO;
@@ -1032,8 +1035,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
- rc = cxl_pci_ras_unmask(pdev);
- if (rc)
+ if (cxl_pci_ras_unmask(pdev))
dev_dbg(&pdev->dev, "No RAS reporting unmasked\n");
pci_save_state(pdev);
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 7eeee3a38202..5baa83b85515 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -60,7 +60,7 @@ static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf)
{
}
-static void __dma_buf_debugfs_list_del(struct file *file)
+static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf)
{
}
#endif
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 8ce1f074c2d3..cc7398cc17d6 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -297,7 +297,7 @@ static const struct dma_buf_ops udmabuf_ops = {
};
#define SEALS_WANTED (F_SEAL_SHRINK)
-#define SEALS_DENIED (F_SEAL_WRITE)
+#define SEALS_DENIED (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
static int check_memfd_seals(struct file *memfd)
{
@@ -317,12 +317,10 @@ static int check_memfd_seals(struct file *memfd)
return 0;
}
-static int export_udmabuf(struct udmabuf *ubuf,
- struct miscdevice *device,
- u32 flags)
+static struct dma_buf *export_udmabuf(struct udmabuf *ubuf,
+ struct miscdevice *device)
{
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
- struct dma_buf *buf;
ubuf->device = device;
exp_info.ops = &udmabuf_ops;
@@ -330,11 +328,7 @@ static int export_udmabuf(struct udmabuf *ubuf,
exp_info.priv = ubuf;
exp_info.flags = O_RDWR;
- buf = dma_buf_export(&exp_info);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
-
- return dma_buf_fd(buf, flags);
+ return dma_buf_export(&exp_info);
}
static long udmabuf_pin_folios(struct udmabuf *ubuf, struct file *memfd,
@@ -391,6 +385,7 @@ static long udmabuf_create(struct miscdevice *device,
struct folio **folios = NULL;
pgoff_t pgcnt = 0, pglimit;
struct udmabuf *ubuf;
+ struct dma_buf *dmabuf;
long ret = -EINVAL;
u32 i, flags;
@@ -436,23 +431,39 @@ static long udmabuf_create(struct miscdevice *device,
goto err;
}
+ /*
+ * Take the inode lock to protect against concurrent
+ * memfd_add_seals(), which takes this lock in write mode.
+ */
+ inode_lock_shared(file_inode(memfd));
ret = check_memfd_seals(memfd);
- if (ret < 0) {
- fput(memfd);
- goto err;
- }
+ if (ret)
+ goto out_unlock;
ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset,
list[i].size, folios);
+out_unlock:
+ inode_unlock_shared(file_inode(memfd));
fput(memfd);
if (ret)
goto err;
}
flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0;
- ret = export_udmabuf(ubuf, device, flags);
- if (ret < 0)
+ dmabuf = export_udmabuf(ubuf, device);
+ if (IS_ERR(dmabuf)) {
+ ret = PTR_ERR(dmabuf);
goto err;
+ }
+ /*
+ * Ownership of ubuf is held by the dmabuf from here.
+ * If the following dma_buf_fd() fails, dma_buf_put() cleans up both the
+ * dmabuf and the ubuf (through udmabuf_ops.release).
+ */
+
+ ret = dma_buf_fd(dmabuf, flags);
+ if (ret < 0)
+ dma_buf_put(dmabuf);
kvfree(folios);
return ret;
diff --git a/drivers/dma/amd/qdma/qdma.c b/drivers/dma/amd/qdma/qdma.c
index 6d9079458fe9..66f00ad67351 100644
--- a/drivers/dma/amd/qdma/qdma.c
+++ b/drivers/dma/amd/qdma/qdma.c
@@ -7,9 +7,9 @@
#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
#include <linux/module.h>
#include <linux/mod_devicetable.h>
-#include <linux/dma-map-ops.h>
#include <linux/platform_device.h>
#include <linux/platform_data/amd_qdma.h>
#include <linux/regmap.h>
@@ -492,18 +492,9 @@ static int qdma_device_verify(struct qdma_device *qdev)
static int qdma_device_setup(struct qdma_device *qdev)
{
- struct device *dev = &qdev->pdev->dev;
u32 ring_sz = QDMA_DEFAULT_RING_SIZE;
int ret = 0;
- while (dev && get_dma_ops(dev))
- dev = dev->parent;
- if (!dev) {
- qdma_err(qdev, "dma device not found");
- return -EINVAL;
- }
- set_dma_ops(&qdev->pdev->dev, get_dma_ops(dev));
-
ret = qdma_setup_fmap_context(qdev);
if (ret) {
qdma_err(qdev, "Failed setup fmap context");
@@ -548,11 +539,12 @@ static void qdma_free_queue_resources(struct dma_chan *chan)
{
struct qdma_queue *queue = to_qdma_queue(chan);
struct qdma_device *qdev = queue->qdev;
- struct device *dev = qdev->dma_dev.dev;
+ struct qdma_platdata *pdata;
qdma_clear_queue_context(queue);
vchan_free_chan_resources(&queue->vchan);
- dma_free_coherent(dev, queue->ring_size * QDMA_MM_DESC_SIZE,
+ pdata = dev_get_platdata(&qdev->pdev->dev);
+ dma_free_coherent(pdata->dma_dev, queue->ring_size * QDMA_MM_DESC_SIZE,
queue->desc_base, queue->dma_desc_base);
}
@@ -565,6 +557,7 @@ static int qdma_alloc_queue_resources(struct dma_chan *chan)
struct qdma_queue *queue = to_qdma_queue(chan);
struct qdma_device *qdev = queue->qdev;
struct qdma_ctxt_sw_desc desc;
+ struct qdma_platdata *pdata;
size_t size;
int ret;
@@ -572,8 +565,9 @@ static int qdma_alloc_queue_resources(struct dma_chan *chan)
if (ret)
return ret;
+ pdata = dev_get_platdata(&qdev->pdev->dev);
size = queue->ring_size * QDMA_MM_DESC_SIZE;
- queue->desc_base = dma_alloc_coherent(qdev->dma_dev.dev, size,
+ queue->desc_base = dma_alloc_coherent(pdata->dma_dev, size,
&queue->dma_desc_base,
GFP_KERNEL);
if (!queue->desc_base) {
@@ -588,7 +582,7 @@ static int qdma_alloc_queue_resources(struct dma_chan *chan)
if (ret) {
qdma_err(qdev, "Failed to setup SW desc ctxt for %s",
chan->name);
- dma_free_coherent(qdev->dma_dev.dev, size, queue->desc_base,
+ dma_free_coherent(pdata->dma_dev, size, queue->desc_base,
queue->dma_desc_base);
return ret;
}
@@ -948,8 +942,9 @@ static int qdma_init_error_irq(struct qdma_device *qdev)
static int qdmam_alloc_qintr_rings(struct qdma_device *qdev)
{
- u32 ctxt[QDMA_CTXT_REGMAP_LEN];
+ struct qdma_platdata *pdata = dev_get_platdata(&qdev->pdev->dev);
struct device *dev = &qdev->pdev->dev;
+ u32 ctxt[QDMA_CTXT_REGMAP_LEN];
struct qdma_intr_ring *ring;
struct qdma_ctxt_intr intr_ctxt;
u32 vector;
@@ -969,7 +964,8 @@ static int qdmam_alloc_qintr_rings(struct qdma_device *qdev)
ring->msix_id = qdev->err_irq_idx + i + 1;
ring->ridx = i;
ring->color = 1;
- ring->base = dmam_alloc_coherent(dev, QDMA_INTR_RING_SIZE,
+ ring->base = dmam_alloc_coherent(pdata->dma_dev,
+ QDMA_INTR_RING_SIZE,
&ring->dev_base, GFP_KERNEL);
if (!ring->base) {
qdma_err(qdev, "Failed to alloc intr ring %d", i);
diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c
index c499173d80b2..bd49f0374291 100644
--- a/drivers/dma/apple-admac.c
+++ b/drivers/dma/apple-admac.c
@@ -153,6 +153,8 @@ static int admac_alloc_sram_carveout(struct admac_data *ad,
{
struct admac_sram *sram;
int i, ret = 0, nblocks;
+ ad->txcache.size = readl_relaxed(ad->base + REG_TX_SRAM_SIZE);
+ ad->rxcache.size = readl_relaxed(ad->base + REG_RX_SRAM_SIZE);
if (dir == DMA_MEM_TO_DEV)
sram = &ad->txcache;
@@ -912,12 +914,7 @@ static int admac_probe(struct platform_device *pdev)
goto free_irq;
}
- ad->txcache.size = readl_relaxed(ad->base + REG_TX_SRAM_SIZE);
- ad->rxcache.size = readl_relaxed(ad->base + REG_RX_SRAM_SIZE);
-
dev_info(&pdev->dev, "Audio DMA Controller\n");
- dev_info(&pdev->dev, "imprint %x TX cache %u RX cache %u\n",
- readl_relaxed(ad->base + REG_IMPRINT), ad->txcache.size, ad->rxcache.size);
return 0;
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 9c7b40220004..ba25c23164e7 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -1363,6 +1363,8 @@ at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
return NULL;
desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value);
+ if (!desc)
+ return NULL;
list_add_tail(&desc->desc_node, &desc->descs_list);
desc->tx_dma_desc.cookie = -EBUSY;
diff --git a/drivers/dma/dw/acpi.c b/drivers/dma/dw/acpi.c
index c510c109d2c3..b6452fffa657 100644
--- a/drivers/dma/dw/acpi.c
+++ b/drivers/dma/dw/acpi.c
@@ -8,13 +8,15 @@
static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param)
{
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dw_dma_chip_pdata *data = dev_get_drvdata(dw->dma.dev);
struct acpi_dma_spec *dma_spec = param;
struct dw_dma_slave slave = {
.dma_dev = dma_spec->dev,
.src_id = dma_spec->slave_id,
.dst_id = dma_spec->slave_id,
- .m_master = 0,
- .p_master = 1,
+ .m_master = data->m_master,
+ .p_master = data->p_master,
};
return dw_dma_filter(chan, &slave);
diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index 563ce73488db..f1bd06a20cd6 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -51,11 +51,15 @@ struct dw_dma_chip_pdata {
int (*probe)(struct dw_dma_chip *chip);
int (*remove)(struct dw_dma_chip *chip);
struct dw_dma_chip *chip;
+ u8 m_master;
+ u8 p_master;
};
static __maybe_unused const struct dw_dma_chip_pdata dw_dma_chip_pdata = {
.probe = dw_dma_probe,
.remove = dw_dma_remove,
+ .m_master = 0,
+ .p_master = 1,
};
static const struct dw_dma_platform_data idma32_pdata = {
@@ -72,6 +76,8 @@ static __maybe_unused const struct dw_dma_chip_pdata idma32_chip_pdata = {
.pdata = &idma32_pdata,
.probe = idma32_dma_probe,
.remove = idma32_dma_remove,
+ .m_master = 0,
+ .p_master = 0,
};
static const struct dw_dma_platform_data xbar_pdata = {
@@ -88,6 +94,8 @@ static __maybe_unused const struct dw_dma_chip_pdata xbar_chip_pdata = {
.pdata = &xbar_pdata,
.probe = idma32_dma_probe,
.remove = idma32_dma_remove,
+ .m_master = 0,
+ .p_master = 0,
};
#endif /* _DMA_DW_INTERNAL_H */
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index ad2d4d012cf7..e8a0eb81726a 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -56,10 +56,10 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
if (ret)
return ret;
- dw_dma_acpi_controller_register(chip->dw);
-
pci_set_drvdata(pdev, data);
+ dw_dma_acpi_controller_register(chip->dw);
+
return 0;
}
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index ce37e1ee9c46..fe8f103d4a63 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -166,6 +166,7 @@ struct fsl_edma_chan {
struct work_struct issue_worker;
struct platform_device *pdev;
struct device *pd_dev;
+ struct device_link *pd_dev_link;
u32 srcid;
struct clk *clk;
int priority;
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 60de1003193a..1a613236b3e4 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -417,10 +417,33 @@ static const struct of_device_id fsl_edma_dt_ids[] = {
};
MODULE_DEVICE_TABLE(of, fsl_edma_dt_ids);
+static void fsl_edma3_detach_pd(struct fsl_edma_engine *fsl_edma)
+{
+ struct fsl_edma_chan *fsl_chan;
+ int i;
+
+ for (i = 0; i < fsl_edma->n_chans; i++) {
+ if (fsl_edma->chan_masked & BIT(i))
+ continue;
+ fsl_chan = &fsl_edma->chans[i];
+ if (fsl_chan->pd_dev_link)
+ device_link_del(fsl_chan->pd_dev_link);
+ if (fsl_chan->pd_dev) {
+ dev_pm_domain_detach(fsl_chan->pd_dev, false);
+ pm_runtime_dont_use_autosuspend(fsl_chan->pd_dev);
+ pm_runtime_set_suspended(fsl_chan->pd_dev);
+ }
+ }
+}
+
+static void devm_fsl_edma3_detach_pd(void *data)
+{
+ fsl_edma3_detach_pd(data);
+}
+
static int fsl_edma3_attach_pd(struct platform_device *pdev, struct fsl_edma_engine *fsl_edma)
{
struct fsl_edma_chan *fsl_chan;
- struct device_link *link;
struct device *pd_chan;
struct device *dev;
int i;
@@ -436,15 +459,16 @@ static int fsl_edma3_attach_pd(struct platform_device *pdev, struct fsl_edma_eng
pd_chan = dev_pm_domain_attach_by_id(dev, i);
if (IS_ERR_OR_NULL(pd_chan)) {
dev_err(dev, "Failed attach pd %d\n", i);
- return -EINVAL;
+ goto detach;
}
- link = device_link_add(dev, pd_chan, DL_FLAG_STATELESS |
+ fsl_chan->pd_dev_link = device_link_add(dev, pd_chan, DL_FLAG_STATELESS |
DL_FLAG_PM_RUNTIME |
DL_FLAG_RPM_ACTIVE);
- if (!link) {
+ if (!fsl_chan->pd_dev_link) {
dev_err(dev, "Failed to add device_link to %d\n", i);
- return -EINVAL;
+ dev_pm_domain_detach(pd_chan, false);
+ goto detach;
}
fsl_chan->pd_dev = pd_chan;
@@ -455,6 +479,10 @@ static int fsl_edma3_attach_pd(struct platform_device *pdev, struct fsl_edma_eng
}
return 0;
+
+detach:
+ fsl_edma3_detach_pd(fsl_edma);
+ return -EINVAL;
}
static int fsl_edma_probe(struct platform_device *pdev)
@@ -544,6 +572,9 @@ static int fsl_edma_probe(struct platform_device *pdev)
ret = fsl_edma3_attach_pd(pdev, fsl_edma);
if (ret)
return ret;
+ ret = devm_add_action_or_reset(&pdev->dev, devm_fsl_edma3_detach_pd, fsl_edma);
+ if (ret)
+ return ret;
}
if (drvdata->flags & FSL_EDMA_DRV_TCD64)
diff --git a/drivers/dma/loongson2-apb-dma.c b/drivers/dma/loongson2-apb-dma.c
index 367ed34ce4da..c528f02b9f84 100644
--- a/drivers/dma/loongson2-apb-dma.c
+++ b/drivers/dma/loongson2-apb-dma.c
@@ -31,7 +31,7 @@
#define LDMA_ASK_VALID BIT(2)
#define LDMA_START BIT(3) /* DMA start operation */
#define LDMA_STOP BIT(4) /* DMA stop operation */
-#define LDMA_CONFIG_MASK GENMASK(4, 0) /* DMA controller config bits mask */
+#define LDMA_CONFIG_MASK GENMASK_ULL(4, 0) /* DMA controller config bits mask */
/* Bitfields in ndesc_addr field of HW descriptor */
#define LDMA_DESC_EN BIT(0) /*1: The next descriptor is valid */
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 43efce77bb57..40b76b40bc30 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1388,6 +1388,7 @@ static int mv_xor_probe(struct platform_device *pdev)
irq = irq_of_parse_and_map(np, 0);
if (!irq) {
ret = -ENODEV;
+ of_node_put(np);
goto err_channel_add;
}
@@ -1396,6 +1397,7 @@ static int mv_xor_probe(struct platform_device *pdev)
if (IS_ERR(chan)) {
ret = PTR_ERR(chan);
irq_dispose_mapping(irq);
+ of_node_put(np);
goto err_channel_add;
}
diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c
index cacf3757adc2..4d6fe0efa76e 100644
--- a/drivers/dma/tegra186-gpc-dma.c
+++ b/drivers/dma/tegra186-gpc-dma.c
@@ -231,6 +231,7 @@ struct tegra_dma_channel {
bool config_init;
char name[30];
enum dma_transfer_direction sid_dir;
+ enum dma_status status;
int id;
int irq;
int slave_id;
@@ -393,6 +394,8 @@ static int tegra_dma_pause(struct tegra_dma_channel *tdc)
tegra_dma_dump_chan_regs(tdc);
}
+ tdc->status = DMA_PAUSED;
+
return ret;
}
@@ -419,6 +422,8 @@ static void tegra_dma_resume(struct tegra_dma_channel *tdc)
val = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSRE);
val &= ~TEGRA_GPCDMA_CHAN_CSRE_PAUSE;
tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSRE, val);
+
+ tdc->status = DMA_IN_PROGRESS;
}
static int tegra_dma_device_resume(struct dma_chan *dc)
@@ -544,6 +549,7 @@ static void tegra_dma_xfer_complete(struct tegra_dma_channel *tdc)
tegra_dma_sid_free(tdc);
tdc->dma_desc = NULL;
+ tdc->status = DMA_COMPLETE;
}
static void tegra_dma_chan_decode_error(struct tegra_dma_channel *tdc,
@@ -716,6 +722,7 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
tdc->dma_desc = NULL;
}
+ tdc->status = DMA_COMPLETE;
tegra_dma_sid_free(tdc);
vchan_get_all_descriptors(&tdc->vc, &head);
spin_unlock_irqrestore(&tdc->vc.lock, flags);
@@ -769,6 +776,9 @@ static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
if (ret == DMA_COMPLETE)
return ret;
+ if (tdc->status == DMA_PAUSED)
+ ret = DMA_PAUSED;
+
spin_lock_irqsave(&tdc->vc.lock, flags);
vd = vchan_find_desc(&tdc->vc, cookie);
if (vd) {
diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c
index eb17d03b66fe..dfda5ffc14db 100644
--- a/drivers/firmware/arm_ffa/bus.c
+++ b/drivers/firmware/arm_ffa/bus.c
@@ -187,13 +187,18 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev)
return valid;
}
-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
- const struct ffa_ops *ops)
+struct ffa_device *
+ffa_device_register(const struct ffa_partition_info *part_info,
+ const struct ffa_ops *ops)
{
int id, ret;
+ uuid_t uuid;
struct device *dev;
struct ffa_device *ffa_dev;
+ if (!part_info)
+ return NULL;
+
id = ida_alloc_min(&ffa_bus_id, 1, GFP_KERNEL);
if (id < 0)
return NULL;
@@ -210,9 +215,11 @@ struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
dev_set_name(&ffa_dev->dev, "arm-ffa-%d", id);
ffa_dev->id = id;
- ffa_dev->vm_id = vm_id;
+ ffa_dev->vm_id = part_info->id;
+ ffa_dev->properties = part_info->properties;
ffa_dev->ops = ops;
- uuid_copy(&ffa_dev->uuid, uuid);
+ import_uuid(&uuid, (u8 *)part_info->uuid);
+ uuid_copy(&ffa_dev->uuid, &uuid);
ret = device_register(&ffa_dev->dev);
if (ret) {
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index b14cbdae94e8..2c2ec3c35f15 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -1387,7 +1387,6 @@ static struct notifier_block ffa_bus_nb = {
static int ffa_setup_partitions(void)
{
int count, idx, ret;
- uuid_t uuid;
struct ffa_device *ffa_dev;
struct ffa_dev_part_info *info;
struct ffa_partition_info *pbuf, *tpbuf;
@@ -1406,23 +1405,19 @@ static int ffa_setup_partitions(void)
xa_init(&drv_info->partition_info);
for (idx = 0, tpbuf = pbuf; idx < count; idx++, tpbuf++) {
- import_uuid(&uuid, (u8 *)tpbuf->uuid);
-
/* Note that if the UUID will be uuid_null, that will require
* ffa_bus_notifier() to find the UUID of this partition id
* with help of ffa_device_match_uuid(). FF-A v1.1 and above
* provides UUID here for each partition as part of the
* discovery API and the same is passed.
*/
- ffa_dev = ffa_device_register(&uuid, tpbuf->id, &ffa_drv_ops);
+ ffa_dev = ffa_device_register(tpbuf, &ffa_drv_ops);
if (!ffa_dev) {
pr_err("%s: failed to register partition ID 0x%x\n",
__func__, tpbuf->id);
continue;
}
- ffa_dev->properties = tpbuf->properties;
-
if (drv_info->version > FFA_VERSION_1_0 &&
!(tpbuf->properties & FFA_PARTITION_AARCH64_EXEC))
ffa_mode_32bit_set(ffa_dev);
diff --git a/drivers/firmware/arm_scmi/vendors/imx/Kconfig b/drivers/firmware/arm_scmi/vendors/imx/Kconfig
index 2883ed24a84d..a01bf5e47301 100644
--- a/drivers/firmware/arm_scmi/vendors/imx/Kconfig
+++ b/drivers/firmware/arm_scmi/vendors/imx/Kconfig
@@ -15,6 +15,7 @@ config IMX_SCMI_BBM_EXT
config IMX_SCMI_MISC_EXT
tristate "i.MX SCMI MISC EXTENSION"
depends on ARM_SCMI_PROTOCOL || (COMPILE_TEST && OF)
+ depends on IMX_SCMI_MISC_DRV
default y if ARCH_MXC
help
This enables i.MX System MISC control logic such as gpio expander
diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig
index 477d3f32d99a..907cd149c40a 100644
--- a/drivers/firmware/imx/Kconfig
+++ b/drivers/firmware/imx/Kconfig
@@ -25,7 +25,6 @@ config IMX_SCU
config IMX_SCMI_MISC_DRV
tristate "IMX SCMI MISC Protocol driver"
- depends on IMX_SCMI_MISC_EXT || COMPILE_TEST
default y if ARCH_MXC
help
The System Controller Management Interface firmware (SCMI FW) is
diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c
index 38a03698cec9..e194f7acb2a9 100644
--- a/drivers/firmware/microchip/mpfs-auto-update.c
+++ b/drivers/firmware/microchip/mpfs-auto-update.c
@@ -402,10 +402,10 @@ static int mpfs_auto_update_available(struct mpfs_auto_update_priv *priv)
return -EIO;
/*
- * Bit 5 of byte 1 is "UL_Auto Update" & if it is set, Auto Update is
+ * Bit 5 of byte 1 is "UL_IAP" & if it is set, Auto Update is
* not possible.
*/
- if (response_msg[1] & AUTO_UPDATE_FEATURE_ENABLED)
+ if ((((u8 *)response_msg)[1] & AUTO_UPDATE_FEATURE_ENABLED))
return -EPERM;
return 0;
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 5504721007cc..772fc7625639 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -99,6 +99,7 @@ config DRM_KUNIT_TEST
config DRM_KMS_HELPER
tristate
depends on DRM
+ select FB_CORE if DRM_FBDEV_EMULATION
help
CRTC helpers for KMS drivers.
@@ -358,6 +359,7 @@ config DRM_TTM_HELPER
tristate
depends on DRM
select DRM_TTM
+ select FB_CORE if DRM_FBDEV_EMULATION
select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
help
Helpers for ttm-based gem objects
@@ -365,6 +367,7 @@ config DRM_TTM_HELPER
config DRM_GEM_DMA_HELPER
tristate
depends on DRM
+ select FB_CORE if DRM_FBDEV_EMULATION
select FB_DMAMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
help
Choose this if you need the GEM DMA helper functions
@@ -372,6 +375,7 @@ config DRM_GEM_DMA_HELPER
config DRM_GEM_SHMEM_HELPER
tristate
depends on DRM && MMU
+ select FB_CORE if DRM_FBDEV_EMULATION
select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
help
Choose this if you need the GEM shmem helper functions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
index 946c48829f19..824f9da5b6ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -343,11 +343,10 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
coredump->skip_vram_check = skip_vram_check;
coredump->reset_vram_lost = vram_lost;
- if (job && job->vm) {
- struct amdgpu_vm *vm = job->vm;
+ if (job && job->pasid) {
struct amdgpu_task_info *ti;
- ti = amdgpu_vm_get_task_info_vm(vm);
+ ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
if (ti) {
coredump->reset_task_info = *ti;
amdgpu_vm_put_task_info(ti);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d272d95dd5b2..cd4fac120834 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -417,6 +417,9 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+ return false;
+
if (adev->has_pr3 ||
((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
return true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index b9d08bc96581..a21c510c408e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -255,7 +255,6 @@ void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
void amdgpu_job_free_resources(struct amdgpu_job *job)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct dma_fence *f;
unsigned i;
@@ -268,7 +267,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
f = NULL;
for (i = 0; i < job->num_ibs; ++i)
- amdgpu_ib_free(ring->adev, &job->ibs[i], f);
+ amdgpu_ib_free(NULL, &job->ibs[i], f);
}
static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ddd7f05e4db9..c9c48b782ec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1266,10 +1266,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
* next command submission.
*/
if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
- uint32_t mem_type = bo->tbo.resource->mem_type;
-
- if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(mem_type)))
+ if (bo->tbo.resource &&
+ !(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
amdgpu_vm_bo_evicted(&bo_va->base);
else
amdgpu_vm_bo_idle(&bo_va->base);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index fe7c48f2fb2a..da327ab48a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -4123,7 +4123,7 @@ static int gfx_v12_0_set_clockgating_state(void *handle,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
gfx_v12_0_update_gfx_clock_gating(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
index 0fbc3be81f14..f2ab5001b492 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
@@ -108,7 +108,7 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
status);
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(4, 1, 0):
mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw];
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index b1b57dcc5a73..d1032e9992b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -271,8 +271,19 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
+#define regRCC_DEV0_EPF6_STRAP4 0xd304
+#define regRCC_DEV0_EPF6_STRAP4_BASE_IDX 5
+
static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
{
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(2, 5, 0):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4, data);
+ break;
+ }
}
#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
index 814ab59fdd4a..41421da63a08 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -275,7 +275,7 @@ static void nbio_v7_11_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(7, 11, 0):
case IP_VERSION(7, 11, 1):
case IP_VERSION(7, 11, 2):
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
index 1ac730328516..3fb6d2aa7e3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -247,7 +247,7 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data);
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(7, 7, 0):
data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index 6a565ce74d5b..5cad09c5f2ff 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -2096,7 +2096,7 @@ static int smu_v14_0_2_enable_gfx_features(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(14, 0, 2))
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 2))
return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnableAllSmuFeatures,
FEATURE_PWR_GFX, NULL);
else
diff --git a/drivers/gpu/drm/display/drm_dp_tunnel.c b/drivers/gpu/drm/display/drm_dp_tunnel.c
index 48b2df120086..90fe07a89260 100644
--- a/drivers/gpu/drm/display/drm_dp_tunnel.c
+++ b/drivers/gpu/drm/display/drm_dp_tunnel.c
@@ -1896,8 +1896,8 @@ static void destroy_mgr(struct drm_dp_tunnel_mgr *mgr)
*
* Creates a DP tunnel manager for @dev.
*
- * Returns a pointer to the tunnel manager if created successfully or NULL in
- * case of an error.
+ * Returns a pointer to the tunnel manager if created successfully or error
+ * pointer in case of failure.
*/
struct drm_dp_tunnel_mgr *
drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count)
@@ -1907,7 +1907,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count)
mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
if (!mgr)
- return NULL;
+ return ERR_PTR(-ENOMEM);
mgr->dev = dev;
init_waitqueue_head(&mgr->bw_req_queue);
@@ -1916,7 +1916,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count)
if (!mgr->groups) {
kfree(mgr);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
#ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG
@@ -1927,7 +1927,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count)
if (!init_group(mgr, &mgr->groups[i])) {
destroy_mgr(mgr);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
mgr->group_count++;
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 6ba167a33461..71573b85d924 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1287,14 +1287,11 @@ EXPORT_SYMBOL(drm_mode_set_name);
*/
int drm_mode_vrefresh(const struct drm_display_mode *mode)
{
- unsigned int num, den;
+ unsigned int num = 1, den = 1;
if (mode->htotal == 0 || mode->vtotal == 0)
return 0;
- num = mode->clock;
- den = mode->htotal * mode->vtotal;
-
if (mode->flags & DRM_MODE_FLAG_INTERLACE)
num *= 2;
if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
@@ -1302,6 +1299,12 @@ int drm_mode_vrefresh(const struct drm_display_mode *mode)
if (mode->vscan > 1)
den *= mode->vscan;
+ if (check_mul_overflow(mode->clock, num, &num))
+ return 0;
+
+ if (check_mul_overflow(mode->htotal * mode->vtotal, den, &den))
+ return 0;
+
return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(num, 1000), den);
}
EXPORT_SYMBOL(drm_mode_vrefresh);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index ba55c059063d..fe1f85e5dda3 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -343,6 +343,11 @@ struct intel_engine_guc_stats {
* @start_gt_clk: GT clock time of last idle to active transition.
*/
u64 start_gt_clk;
+
+ /**
+ * @total: The last value of total returned
+ */
+ u64 total;
};
union intel_engine_tlb_inv_reg {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 9ede6f240d79..c0bd730383f2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1243,6 +1243,21 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine,
} while (++i < 6);
}
+static void __set_engine_usage_record(struct intel_engine_cs *engine,
+ u32 last_in, u32 id, u32 total)
+{
+ struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
+
+#define record_write(map_, field_, val_) \
+ iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_)
+
+ record_write(&rec_map, last_switch_in_stamp, last_in);
+ record_write(&rec_map, current_context_index, id);
+ record_write(&rec_map, total_runtime, total);
+
+#undef record_write
+}
+
static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
{
struct intel_engine_guc_stats *stats = &engine->stats.guc;
@@ -1363,9 +1378,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
total += intel_gt_clock_interval_to_ns(gt, clk);
}
+ if (total > stats->total)
+ stats->total = total;
+
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
- return ns_to_ktime(total);
+ return ns_to_ktime(stats->total);
}
static void guc_enable_busyness_worker(struct intel_guc *guc)
@@ -1431,8 +1449,21 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
guc_update_pm_timestamp(guc, &unused);
for_each_engine(engine, gt, id) {
+ struct intel_engine_guc_stats *stats = &engine->stats.guc;
+
guc_update_engine_gt_clks(engine);
- engine->stats.guc.prev_total = 0;
+
+ /*
+ * If resetting a running context, accumulate the active
+ * time as well since there will be no context switch.
+ */
+ if (stats->running) {
+ u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
+
+ stats->total_gt_clks += clk;
+ }
+ stats->prev_total = 0;
+ stats->running = 0;
}
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
@@ -1543,6 +1574,9 @@ err_trylock:
static int guc_action_enable_usage_stats(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
u32 offset = intel_guc_engine_usage_offset(guc);
u32 action[] = {
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
@@ -1550,6 +1584,9 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc)
0,
};
+ for_each_engine(engine, gt, id)
+ __set_engine_usage_record(engine, 0, 0xffffffff, 0);
+
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c
index 8b48bba18131..3644a7544b93 100644
--- a/drivers/gpu/drm/panel/panel-himax-hx83102.c
+++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c
@@ -565,6 +565,8 @@ static int hx83102_get_modes(struct drm_panel *panel,
struct drm_display_mode *mode;
mode = drm_mode_duplicate(connector->dev, m);
+ if (!mode)
+ return -ENOMEM;
mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
drm_mode_set_name(mode);
diff --git a/drivers/gpu/drm/panel/panel-novatek-nt35950.c b/drivers/gpu/drm/panel/panel-novatek-nt35950.c
index b036208f9356..08b22b592ab0 100644
--- a/drivers/gpu/drm/panel/panel-novatek-nt35950.c
+++ b/drivers/gpu/drm/panel/panel-novatek-nt35950.c
@@ -481,9 +481,9 @@ static int nt35950_probe(struct mipi_dsi_device *dsi)
return dev_err_probe(dev, -EPROBE_DEFER, "Cannot get secondary DSI host\n");
nt->dsi[1] = mipi_dsi_device_register_full(dsi_r_host, info);
- if (!nt->dsi[1]) {
+ if (IS_ERR(nt->dsi[1])) {
dev_err(dev, "Cannot get secondary DSI node\n");
- return -ENODEV;
+ return PTR_ERR(nt->dsi[1]);
}
num_dsis++;
}
diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7701.c b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
index eef03d04e0cd..1f72ef7ca74c 100644
--- a/drivers/gpu/drm/panel/panel-sitronix-st7701.c
+++ b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
@@ -1177,6 +1177,7 @@ static int st7701_probe(struct device *dev, int connector_type)
return dev_err_probe(dev, ret, "Failed to get orientation\n");
drm_panel_init(&st7701->panel, dev, &st7701_funcs, connector_type);
+ st7701->panel.prepare_prev_first = true;
/**
* Once sleep out has been issued, ST7701 IC required to wait 120ms
diff --git a/drivers/gpu/drm/panel/panel-synaptics-r63353.c b/drivers/gpu/drm/panel/panel-synaptics-r63353.c
index 169c629746c7..17349825543f 100644
--- a/drivers/gpu/drm/panel/panel-synaptics-r63353.c
+++ b/drivers/gpu/drm/panel/panel-synaptics-r63353.c
@@ -325,7 +325,7 @@ static void r63353_panel_shutdown(struct mipi_dsi_device *dsi)
{
struct r63353_panel *rpanel = mipi_dsi_get_drvdata(dsi);
- r63353_panel_unprepare(&rpanel->base);
+ drm_panel_unprepare(&rpanel->base);
}
static const struct r63353_desc sharp_ls068b3sx02_data = {
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 7ce25281c74c..57da84908752 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1355,7 +1355,8 @@ EXPORT_SYMBOL(drm_sched_init);
* drm_sched_backend_ops.run_job(). Consequently, drm_sched_backend_ops.free_job()
* will not be called for all jobs still in drm_gpu_scheduler.pending_list.
* There is no solution for this currently. Thus, it is up to the driver to make
- * sure that
+ * sure that:
+ *
* a) drm_sched_fini() is only called after for all submitted jobs
* drm_sched_backend_ops.free_job() has been called or that
* b) the jobs for which drm_sched_backend_ops.free_job() has not been called
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index c38dcdfcb914..a99112e6f0b8 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -756,7 +756,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
* adding succeeded, it is ok to proceed even if the memory was
* not onlined in time.
*/
- wait_for_completion_timeout(&dm_device.ol_waitevent, 5 * HZ);
+ wait_for_completion_timeout(&dm_device.ol_waitevent, secs_to_jiffies(5));
post_status(&dm_device);
}
}
@@ -1373,7 +1373,8 @@ static int dm_thread_func(void *dm_dev)
struct hv_dynmem_device *dm = dm_dev;
while (!kthread_should_stop()) {
- wait_for_completion_interruptible_timeout(&dm_device.config_event, 1 * HZ);
+ wait_for_completion_interruptible_timeout(&dm_device.config_event,
+ secs_to_jiffies(1));
/*
* The host expects us to post information on the memory
* pressure every second.
@@ -1748,7 +1749,7 @@ static int balloon_connect_vsp(struct hv_device *dev)
if (ret)
goto out;
- t = wait_for_completion_timeout(&dm_device.host_event, 5 * HZ);
+ t = wait_for_completion_timeout(&dm_device.host_event, secs_to_jiffies(5));
if (t == 0) {
ret = -ETIMEDOUT;
goto out;
@@ -1806,7 +1807,7 @@ static int balloon_connect_vsp(struct hv_device *dev)
if (ret)
goto out;
- t = wait_for_completion_timeout(&dm_device.host_event, 5 * HZ);
+ t = wait_for_completion_timeout(&dm_device.host_event, secs_to_jiffies(5));
if (t == 0) {
ret = -ETIMEDOUT;
goto out;
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index d35b60c06114..7400a5a4d2bd 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -655,7 +655,7 @@ void hv_kvp_onchannelcallback(void *context)
if (host_negotiatied == NEGO_NOT_STARTED) {
host_negotiatied = NEGO_IN_PROGRESS;
schedule_delayed_work(&kvp_host_handshake_work,
- HV_UTIL_NEGO_TIMEOUT * HZ);
+ secs_to_jiffies(HV_UTIL_NEGO_TIMEOUT));
}
return;
}
@@ -724,7 +724,7 @@ void hv_kvp_onchannelcallback(void *context)
*/
schedule_work(&kvp_sendkey_work);
schedule_delayed_work(&kvp_timeout_work,
- HV_UTIL_TIMEOUT * HZ);
+ secs_to_jiffies(HV_UTIL_TIMEOUT));
return;
@@ -767,6 +767,12 @@ hv_kvp_init(struct hv_util_service *srv)
*/
kvp_transaction.state = HVUTIL_DEVICE_INIT;
+ return 0;
+}
+
+int
+hv_kvp_init_transport(void)
+{
hvt = hvutil_transport_init(kvp_devname, CN_KVP_IDX, CN_KVP_VAL,
kvp_on_msg, kvp_on_reset);
if (!hvt)
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 0d2184be1691..bde637a96c37 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -193,7 +193,8 @@ static void vss_send_op(void)
vss_transaction.state = HVUTIL_USERSPACE_REQ;
schedule_delayed_work(&vss_timeout_work, op == VSS_OP_FREEZE ?
- VSS_FREEZE_TIMEOUT * HZ : HV_UTIL_TIMEOUT * HZ);
+ secs_to_jiffies(VSS_FREEZE_TIMEOUT) :
+ secs_to_jiffies(HV_UTIL_TIMEOUT));
rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
if (rc) {
@@ -388,6 +389,12 @@ hv_vss_init(struct hv_util_service *srv)
*/
vss_transaction.state = HVUTIL_DEVICE_INIT;
+ return 0;
+}
+
+int
+hv_vss_init_transport(void)
+{
hvt = hvutil_transport_init(vss_devname, CN_VSS_IDX, CN_VSS_VAL,
vss_on_msg, vss_on_reset);
if (!hvt) {
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index c4f525325790..36ee89c0358b 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -141,6 +141,7 @@ static struct hv_util_service util_heartbeat = {
static struct hv_util_service util_kvp = {
.util_cb = hv_kvp_onchannelcallback,
.util_init = hv_kvp_init,
+ .util_init_transport = hv_kvp_init_transport,
.util_pre_suspend = hv_kvp_pre_suspend,
.util_pre_resume = hv_kvp_pre_resume,
.util_deinit = hv_kvp_deinit,
@@ -149,6 +150,7 @@ static struct hv_util_service util_kvp = {
static struct hv_util_service util_vss = {
.util_cb = hv_vss_onchannelcallback,
.util_init = hv_vss_init,
+ .util_init_transport = hv_vss_init_transport,
.util_pre_suspend = hv_vss_pre_suspend,
.util_pre_resume = hv_vss_pre_resume,
.util_deinit = hv_vss_deinit,
@@ -590,10 +592,8 @@ static int util_probe(struct hv_device *dev,
srv->channel = dev->channel;
if (srv->util_init) {
ret = srv->util_init(srv);
- if (ret) {
- ret = -ENODEV;
+ if (ret)
goto error1;
- }
}
/*
@@ -613,6 +613,13 @@ static int util_probe(struct hv_device *dev,
if (ret)
goto error;
+ if (srv->util_init_transport) {
+ ret = srv->util_init_transport();
+ if (ret) {
+ vmbus_close(dev->channel);
+ goto error;
+ }
+ }
return 0;
error:
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index d2856023d53c..52cb744b4d7f 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -370,12 +370,14 @@ void vmbus_on_event(unsigned long data);
void vmbus_on_msg_dpc(unsigned long data);
int hv_kvp_init(struct hv_util_service *srv);
+int hv_kvp_init_transport(void);
void hv_kvp_deinit(void);
int hv_kvp_pre_suspend(void);
int hv_kvp_pre_resume(void);
void hv_kvp_onchannelcallback(void *context);
int hv_vss_init(struct hv_util_service *srv);
+int hv_vss_init_transport(void);
void hv_vss_deinit(void);
int hv_vss_pre_suspend(void);
int hv_vss_pre_resume(void);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 6d89d37b069a..2892b8da20a5 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2507,7 +2507,7 @@ static int vmbus_bus_resume(struct device *dev)
vmbus_request_offers();
if (wait_for_completion_timeout(
- &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0)
+ &vmbus_connection.ready_for_resume_event, secs_to_jiffies(10)) == 0)
pr_err("Some vmbus device is missing after suspending?\n");
/* Reset the event for the next suspend. */
diff --git a/drivers/hwmon/tmp513.c b/drivers/hwmon/tmp513.c
index 926d28cd3fab..1c2cb12071b8 100644
--- a/drivers/hwmon/tmp513.c
+++ b/drivers/hwmon/tmp513.c
@@ -182,7 +182,7 @@ struct tmp51x_data {
struct regmap *regmap;
};
-// Set the shift based on the gain 8=4, 4=3, 2=2, 1=1
+// Set the shift based on the gain: 8 -> 1, 4 -> 2, 2 -> 3, 1 -> 4
static inline u8 tmp51x_get_pga_shift(struct tmp51x_data *data)
{
return 5 - ffs(data->pga_gain);
@@ -204,7 +204,9 @@ static int tmp51x_get_value(struct tmp51x_data *data, u8 reg, u8 pos,
* 2's complement number shifted by one to four depending
* on the pga gain setting. 1lsb = 10uV
*/
- *val = sign_extend32(regval, 17 - tmp51x_get_pga_shift(data));
+ *val = sign_extend32(regval,
+ reg == TMP51X_SHUNT_CURRENT_RESULT ?
+ 16 - tmp51x_get_pga_shift(data) : 15);
*val = DIV_ROUND_CLOSEST(*val * 10 * MILLI, data->shunt_uohms);
break;
case TMP51X_BUS_VOLTAGE_RESULT:
@@ -220,7 +222,7 @@ static int tmp51x_get_value(struct tmp51x_data *data, u8 reg, u8 pos,
break;
case TMP51X_BUS_CURRENT_RESULT:
// Current = (ShuntVoltage * CalibrationRegister) / 4096
- *val = sign_extend32(regval, 16) * data->curr_lsb_ua;
+ *val = sign_extend32(regval, 15) * (long)data->curr_lsb_ua;
*val = DIV_ROUND_CLOSEST(*val, MILLI);
break;
case TMP51X_LOCAL_TEMP_RESULT:
@@ -232,7 +234,7 @@ static int tmp51x_get_value(struct tmp51x_data *data, u8 reg, u8 pos,
case TMP51X_REMOTE_TEMP_LIMIT_2:
case TMP513_REMOTE_TEMP_LIMIT_3:
// 1lsb = 0.0625 degrees centigrade
- *val = sign_extend32(regval, 16) >> TMP51X_TEMP_SHIFT;
+ *val = sign_extend32(regval, 15) >> TMP51X_TEMP_SHIFT;
*val = DIV_ROUND_CLOSEST(*val * 625, 10);
break;
case TMP51X_N_FACTOR_AND_HYST_1:
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index fb38f684444f..d00e713c1092 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -120,6 +120,7 @@ config PMAC_MEDIABAY
config PMAC_BACKLIGHT
bool "Backlight control for LCD screens"
depends on PPC_PMAC && ADB_PMU && FB = y && (BROKEN || !PPC64)
+ depends on BACKLIGHT_CLASS_DEVICE=y
select FB_BACKLIGHT
help
Say Y here to enable Macintosh specific extensions of the generic
diff --git a/drivers/media/dvb-frontends/dib3000mb.c b/drivers/media/dvb-frontends/dib3000mb.c
index 822639f11c04..63bc7b74bc8b 100644
--- a/drivers/media/dvb-frontends/dib3000mb.c
+++ b/drivers/media/dvb-frontends/dib3000mb.c
@@ -51,7 +51,7 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,2=xfer,4=setfe,8=getfe (|-a
static int dib3000_read_reg(struct dib3000_state *state, u16 reg)
{
u8 wb[] = { ((reg >> 8) | 0x80) & 0xff, reg & 0xff };
- u8 rb[2];
+ u8 rb[2] = {};
struct i2c_msg msg[] = {
{ .addr = state->config.demod_address, .flags = 0, .buf = wb, .len = 2 },
{ .addr = state->config.demod_address, .flags = I2C_M_RD, .buf = rb, .len = 2 },
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
index eea709d93820..47c302745c1d 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
@@ -1188,7 +1188,8 @@ err:
return ret;
}
-static
+/* clang stack usage explodes if this is inlined */
+static noinline_for_stack
void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k,
struct vdec_vp9_slice_frame_counts *counts,
struct v4l2_vp9_frame_symbol_counts *counts_helper)
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index efb0d2d5716b..af445d3f8e2a 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -3070,6 +3070,7 @@ release_clk:
msdc_gate_clock(host);
platform_set_drvdata(pdev, NULL);
release_mem:
+ device_init_wakeup(&pdev->dev, false);
if (host->dma.gpd)
dma_free_coherent(&pdev->dev,
2 * sizeof(struct mt_gpdma_desc),
@@ -3103,6 +3104,7 @@ static void msdc_drv_remove(struct platform_device *pdev)
host->dma.gpd, host->dma.gpd_addr);
dma_free_coherent(&pdev->dev, MAX_BD_NUM * sizeof(struct mt_bdma_desc),
host->dma.bd, host->dma.bd_addr);
+ device_init_wakeup(&pdev->dev, false);
}
static void msdc_save_reg(struct msdc_host *host)
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 4d402b601883..b2f5c3f8b839 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -1525,7 +1525,6 @@ static const struct sdhci_pltfm_data sdhci_tegra186_pdata = {
.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
SDHCI_QUIRK_SINGLE_POWER_WRITE |
SDHCI_QUIRK_NO_HISPD_BIT |
- SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
SDHCI_QUIRK2_ISSUE_CMD_DAT_RESET_TOGETHER,
diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c
index db42aa0c7b6b..865754737f5f 100644
--- a/drivers/mtd/nand/raw/arasan-nand-controller.c
+++ b/drivers/mtd/nand/raw/arasan-nand-controller.c
@@ -1409,8 +1409,8 @@ static int anfc_parse_cs(struct arasan_nfc *nfc)
* case, the "not" chosen CS is assigned to nfc->spare_cs and selected
* whenever a GPIO CS must be asserted.
*/
- if (nfc->cs_array && nfc->ncs > 2) {
- if (!nfc->cs_array[0] && !nfc->cs_array[1]) {
+ if (nfc->cs_array) {
+ if (nfc->ncs > 2 && !nfc->cs_array[0] && !nfc->cs_array[1]) {
dev_err(nfc->dev,
"Assign a single native CS when using GPIOs\n");
return -EINVAL;
@@ -1478,8 +1478,15 @@ static int anfc_probe(struct platform_device *pdev)
static void anfc_remove(struct platform_device *pdev)
{
+ int i;
struct arasan_nfc *nfc = platform_get_drvdata(pdev);
+ for (i = 0; i < nfc->ncs; i++) {
+ if (nfc->cs_array[i]) {
+ gpiod_put(nfc->cs_array[i]);
+ }
+ }
+
anfc_chips_cleanup(nfc);
}
diff --git a/drivers/mtd/nand/raw/atmel/pmecc.c b/drivers/mtd/nand/raw/atmel/pmecc.c
index a22aab4ed4e8..3c7dee1be21d 100644
--- a/drivers/mtd/nand/raw/atmel/pmecc.c
+++ b/drivers/mtd/nand/raw/atmel/pmecc.c
@@ -380,10 +380,8 @@ atmel_pmecc_create_user(struct atmel_pmecc *pmecc,
user->delta = user->dmu + req->ecc.strength + 1;
gf_tables = atmel_pmecc_get_gf_tables(req);
- if (IS_ERR(gf_tables)) {
- kfree(user);
+ if (IS_ERR(gf_tables))
return ERR_CAST(gf_tables);
- }
user->gf_tables = gf_tables;
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 8db7fc424571..70d6c2250f32 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -1098,7 +1098,7 @@ static inline int __init inftl_partscan(struct mtd_info *mtd, struct mtd_partiti
(i == 0) && (ip->firstUnit > 0)) {
parts[0].name = " DiskOnChip IPL / Media Header partition";
parts[0].offset = 0;
- parts[0].size = mtd->erasesize * ip->firstUnit;
+ parts[0].size = (uint64_t)mtd->erasesize * ip->firstUnit;
numparts = 1;
}
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index d9141f3c0dd1..b8af3a3533fc 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -254,6 +254,10 @@ static int omap_prefetch_reset(int cs, struct omap_nand_info *info)
/**
* omap_nand_data_in_pref - NAND data in using prefetch engine
+ * @chip: NAND chip
+ * @buf: output buffer where NAND data is placed into
+ * @len: length of transfer
+ * @force_8bit: force 8-bit transfers
*/
static void omap_nand_data_in_pref(struct nand_chip *chip, void *buf,
unsigned int len, bool force_8bit)
@@ -297,6 +301,10 @@ static void omap_nand_data_in_pref(struct nand_chip *chip, void *buf,
/**
* omap_nand_data_out_pref - NAND data out using Write Posting engine
+ * @chip: NAND chip
+ * @buf: input buffer that is sent to NAND
+ * @len: length of transfer
+ * @force_8bit: force 8-bit transfers
*/
static void omap_nand_data_out_pref(struct nand_chip *chip,
const void *buf, unsigned int len,
@@ -440,6 +448,10 @@ out_copy:
/**
* omap_nand_data_in_dma_pref - NAND data in using DMA and Prefetch
+ * @chip: NAND chip
+ * @buf: output buffer where NAND data is placed into
+ * @len: length of transfer
+ * @force_8bit: force 8-bit transfers
*/
static void omap_nand_data_in_dma_pref(struct nand_chip *chip, void *buf,
unsigned int len, bool force_8bit)
@@ -460,6 +472,10 @@ static void omap_nand_data_in_dma_pref(struct nand_chip *chip, void *buf,
/**
* omap_nand_data_out_dma_pref - NAND data out using DMA and write posting
+ * @chip: NAND chip
+ * @buf: input buffer that is sent to NAND
+ * @len: length of transfer
+ * @force_8bit: force 8-bit transfers
*/
static void omap_nand_data_out_dma_pref(struct nand_chip *chip,
const void *buf, unsigned int len,
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 533bcb77c9f9..97cd8bbf2e32 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1220,20 +1220,32 @@ static void m_can_coalescing_update(struct m_can_classdev *cdev, u32 ir)
static int m_can_interrupt_handler(struct m_can_classdev *cdev)
{
struct net_device *dev = cdev->net;
- u32 ir;
+ u32 ir = 0, ir_read;
int ret;
if (pm_runtime_suspended(cdev->dev))
return IRQ_NONE;
- ir = m_can_read(cdev, M_CAN_IR);
+ /* The m_can controller signals its interrupt status as a level, but
+ * depending in the integration the CPU may interpret the signal as
+ * edge-triggered (for example with m_can_pci). For these
+ * edge-triggered integrations, we must observe that IR is 0 at least
+ * once to be sure that the next interrupt will generate an edge.
+ */
+ while ((ir_read = m_can_read(cdev, M_CAN_IR)) != 0) {
+ ir |= ir_read;
+
+ /* ACK all irqs */
+ m_can_write(cdev, M_CAN_IR, ir);
+
+ if (!cdev->irq_edge_triggered)
+ break;
+ }
+
m_can_coalescing_update(cdev, ir);
if (!ir)
return IRQ_NONE;
- /* ACK all irqs */
- m_can_write(cdev, M_CAN_IR, ir);
-
if (cdev->ops->clear_interrupts)
cdev->ops->clear_interrupts(cdev);
@@ -1695,6 +1707,14 @@ static int m_can_dev_setup(struct m_can_classdev *cdev)
return -EINVAL;
}
+ /* Write the INIT bit, in case no hardware reset has happened before
+ * the probe (for example, it was observed that the Intel Elkhart Lake
+ * SoCs do not properly reset the CAN controllers on reboot)
+ */
+ err = m_can_cccr_update_bits(cdev, CCCR_INIT, CCCR_INIT);
+ if (err)
+ return err;
+
if (!cdev->is_peripheral)
netif_napi_add(dev, &cdev->napi, m_can_poll);
@@ -1746,11 +1766,7 @@ static int m_can_dev_setup(struct m_can_classdev *cdev)
return -EINVAL;
}
- /* Forcing standby mode should be redundant, as the chip should be in
- * standby after a reset. Write the INIT bit anyways, should the chip
- * be configured by previous stage.
- */
- return m_can_cccr_update_bits(cdev, CCCR_INIT, CCCR_INIT);
+ return 0;
}
static void m_can_stop(struct net_device *dev)
diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h
index 92b2bd8628e6..ef39e8e527ab 100644
--- a/drivers/net/can/m_can/m_can.h
+++ b/drivers/net/can/m_can/m_can.h
@@ -99,6 +99,7 @@ struct m_can_classdev {
int pm_clock_support;
int pm_wake_source;
int is_peripheral;
+ bool irq_edge_triggered;
// Cached M_CAN_IE register content
u32 active_interrupts;
diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c
index d72fe771dfc7..9ad7419f88f8 100644
--- a/drivers/net/can/m_can/m_can_pci.c
+++ b/drivers/net/can/m_can/m_can_pci.c
@@ -127,6 +127,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
mcan_class->pm_clock_support = 1;
mcan_class->pm_wake_source = 0;
mcan_class->can.clock.freq = id->driver_data;
+ mcan_class->irq_edge_triggered = true;
mcan_class->ops = &m_can_pci_ops;
pci_set_drvdata(pci, mcan_class);
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index ecce23cecbea..4e266ce41180 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -171,6 +171,7 @@ static int platform_phy_connect(struct bgmac *bgmac)
static int bgmac_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
+ struct device_node *phy_node;
struct bgmac *bgmac;
struct resource *regs;
int ret;
@@ -236,7 +237,9 @@ static int bgmac_probe(struct platform_device *pdev)
bgmac->cco_ctl_maskset = platform_bgmac_cco_ctl_maskset;
bgmac->get_bus_clock = platform_bgmac_get_bus_clock;
bgmac->cmn_maskset32 = platform_bgmac_cmn_maskset32;
- if (of_parse_phandle(np, "phy-handle", 0)) {
+ phy_node = of_parse_phandle(np, "phy-handle", 0);
+ if (phy_node) {
+ of_node_put(phy_node);
bgmac->phy_connect = platform_phy_connect;
} else {
bgmac->phy_connect = bgmac_phy_connect_direct;
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
index 96fd31d75dfd..daa1ebaef511 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
@@ -346,8 +346,9 @@ static struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
* driver. Once driver synthesizes cpl_pass_accept_req the skb will go
* through the regular cpl_pass_accept_req processing in TOM.
*/
- skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req)
- - pktshift, GFP_ATOMIC);
+ skb = alloc_skb(size_add(gl->tot_len,
+ sizeof(struct cpl_pass_accept_req)) -
+ pktshift, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
__skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 890f213da8d1..ae1f523d6841 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -172,6 +172,7 @@ err_init_txq:
hinic_sq_dbgfs_uninit(nic_dev);
devm_kfree(&netdev->dev, nic_dev->txqs);
+ nic_dev->txqs = NULL;
return err;
}
@@ -268,6 +269,7 @@ err_init_rxq:
hinic_rq_dbgfs_uninit(nic_dev);
devm_kfree(&netdev->dev, nic_dev->rxqs);
+ nic_dev->rxqs = NULL;
return err;
}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c
index 6c913a703df6..41e4bd49402a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_dev.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c
@@ -101,6 +101,9 @@ static int idpf_intr_reg_init(struct idpf_vport *vport)
intr->dyn_ctl_itridx_s = PF_GLINT_DYN_CTL_ITR_INDX_S;
intr->dyn_ctl_intrvl_s = PF_GLINT_DYN_CTL_INTERVAL_S;
intr->dyn_ctl_wb_on_itr_m = PF_GLINT_DYN_CTL_WB_ON_ITR_M;
+ intr->dyn_ctl_swint_trig_m = PF_GLINT_DYN_CTL_SWINT_TRIG_M;
+ intr->dyn_ctl_sw_itridx_ena_m =
+ PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_M;
spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing,
IDPF_PF_ITR_IDX_SPACING);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 34f4118c7bc0..2fa9c36e33c9 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3604,21 +3604,31 @@ static void idpf_vport_intr_dis_irq_all(struct idpf_vport *vport)
/**
* idpf_vport_intr_buildreg_itr - Enable default interrupt generation settings
* @q_vector: pointer to q_vector
- * @type: itr index
- * @itr: itr value
*/
-static u32 idpf_vport_intr_buildreg_itr(struct idpf_q_vector *q_vector,
- const int type, u16 itr)
+static u32 idpf_vport_intr_buildreg_itr(struct idpf_q_vector *q_vector)
{
- u32 itr_val;
+ u32 itr_val = q_vector->intr_reg.dyn_ctl_intena_m;
+ int type = IDPF_NO_ITR_UPDATE_IDX;
+ u16 itr = 0;
+
+ if (q_vector->wb_on_itr) {
+ /*
+ * Trigger a software interrupt when exiting wb_on_itr, to make
+ * sure we catch any pending write backs that might have been
+ * missed due to interrupt state transition.
+ */
+ itr_val |= q_vector->intr_reg.dyn_ctl_swint_trig_m |
+ q_vector->intr_reg.dyn_ctl_sw_itridx_ena_m;
+ type = IDPF_SW_ITR_UPDATE_IDX;
+ itr = IDPF_ITR_20K;
+ }
itr &= IDPF_ITR_MASK;
/* Don't clear PBA because that can cause lost interrupts that
* came in while we were cleaning/polling
*/
- itr_val = q_vector->intr_reg.dyn_ctl_intena_m |
- (type << q_vector->intr_reg.dyn_ctl_itridx_s) |
- (itr << (q_vector->intr_reg.dyn_ctl_intrvl_s - 1));
+ itr_val |= (type << q_vector->intr_reg.dyn_ctl_itridx_s) |
+ (itr << (q_vector->intr_reg.dyn_ctl_intrvl_s - 1));
return itr_val;
}
@@ -3716,9 +3726,8 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector)
/* net_dim() updates ITR out-of-band using a work item */
idpf_net_dim(q_vector);
+ intval = idpf_vport_intr_buildreg_itr(q_vector);
q_vector->wb_on_itr = false;
- intval = idpf_vport_intr_buildreg_itr(q_vector,
- IDPF_NO_ITR_UPDATE_IDX, 0);
writel(intval, q_vector->intr_reg.dyn_ctl);
}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index 9c1fe84108ed..0f71a6f5557b 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -354,6 +354,8 @@ struct idpf_vec_regs {
* @dyn_ctl_itridx_m: Mask for ITR index
* @dyn_ctl_intrvl_s: Register bit offset for ITR interval
* @dyn_ctl_wb_on_itr_m: Mask for WB on ITR feature
+ * @dyn_ctl_sw_itridx_ena_m: Mask for SW ITR index
+ * @dyn_ctl_swint_trig_m: Mask for dyn_ctl SW triggered interrupt enable
* @rx_itr: RX ITR register
* @tx_itr: TX ITR register
* @icr_ena: Interrupt cause register offset
@@ -367,6 +369,8 @@ struct idpf_intr_reg {
u32 dyn_ctl_itridx_m;
u32 dyn_ctl_intrvl_s;
u32 dyn_ctl_wb_on_itr_m;
+ u32 dyn_ctl_sw_itridx_ena_m;
+ u32 dyn_ctl_swint_trig_m;
void __iomem *rx_itr;
void __iomem *tx_itr;
void __iomem *icr_ena;
@@ -437,7 +441,7 @@ struct idpf_q_vector {
cpumask_var_t affinity_mask;
__cacheline_group_end_aligned(cold);
};
-libeth_cacheline_set_assert(struct idpf_q_vector, 112,
+libeth_cacheline_set_assert(struct idpf_q_vector, 120,
24 + sizeof(struct napi_struct) +
2 * sizeof(struct dim),
8 + sizeof(cpumask_var_t));
@@ -471,6 +475,8 @@ struct idpf_tx_queue_stats {
#define IDPF_ITR_IS_DYNAMIC(itr_mode) (itr_mode)
#define IDPF_ITR_TX_DEF IDPF_ITR_20K
#define IDPF_ITR_RX_DEF IDPF_ITR_20K
+/* Index used for 'SW ITR' update in DYN_CTL register */
+#define IDPF_SW_ITR_UPDATE_IDX 2
/* Index used for 'No ITR' update in DYN_CTL register */
#define IDPF_NO_ITR_UPDATE_IDX 3
#define IDPF_ITR_IDX_SPACING(spacing, dflt) (spacing ? spacing : dflt)
diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
index aad62e270ae4..aba828abcb17 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
@@ -101,6 +101,9 @@ static int idpf_vf_intr_reg_init(struct idpf_vport *vport)
intr->dyn_ctl_itridx_s = VF_INT_DYN_CTLN_ITR_INDX_S;
intr->dyn_ctl_intrvl_s = VF_INT_DYN_CTLN_INTERVAL_S;
intr->dyn_ctl_wb_on_itr_m = VF_INT_DYN_CTLN_WB_ON_ITR_M;
+ intr->dyn_ctl_swint_trig_m = VF_INT_DYN_CTLN_SWINT_TRIG_M;
+ intr->dyn_ctl_sw_itridx_ena_m =
+ VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_M;
spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing,
IDPF_VF_ITR_IDX_SPACING);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c
index 232b10740c13..04e08e06f30f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c
@@ -680,14 +680,17 @@ int rvu_rep_create(struct otx2_nic *priv, struct netlink_ext_ack *extack)
ndev->features |= ndev->hw_features;
eth_hw_addr_random(ndev);
err = rvu_rep_devlink_port_register(rep);
- if (err)
+ if (err) {
+ free_netdev(ndev);
goto exit;
+ }
SET_NETDEV_DEVLINK_PORT(ndev, &rep->dl_port);
err = register_netdev(ndev);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"PFVF representor registration failed");
+ rvu_rep_devlink_port_unregister(rep);
free_netdev(ndev);
goto exit;
}
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 3d72aa7b1305..ef93df520887 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1432,7 +1432,7 @@ void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port,
memset(ifh, 0, OCELOT_TAG_LEN);
ocelot_ifh_set_bypass(ifh, 1);
- ocelot_ifh_set_src(ifh, BIT_ULL(ocelot->num_phys_ports));
+ ocelot_ifh_set_src(ifh, ocelot->num_phys_ports);
ocelot_ifh_set_dest(ifh, BIT_ULL(port));
ocelot_ifh_set_qos_class(ifh, qos_class);
ocelot_ifh_set_tag_type(ifh, tag_type);
diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c
index f9c0dcd965c2..db200e4ec284 100644
--- a/drivers/net/ethernet/oa_tc6.c
+++ b/drivers/net/ethernet/oa_tc6.c
@@ -113,6 +113,7 @@ struct oa_tc6 {
struct mii_bus *mdiobus;
struct spi_device *spi;
struct mutex spi_ctrl_lock; /* Protects spi control transfer */
+ spinlock_t tx_skb_lock; /* Protects tx skb handling */
void *spi_ctrl_tx_buf;
void *spi_ctrl_rx_buf;
void *spi_data_tx_buf;
@@ -1004,8 +1005,10 @@ static u16 oa_tc6_prepare_spi_tx_buf_for_tx_skbs(struct oa_tc6 *tc6)
for (used_tx_credits = 0; used_tx_credits < tc6->tx_credits;
used_tx_credits++) {
if (!tc6->ongoing_tx_skb) {
+ spin_lock_bh(&tc6->tx_skb_lock);
tc6->ongoing_tx_skb = tc6->waiting_tx_skb;
tc6->waiting_tx_skb = NULL;
+ spin_unlock_bh(&tc6->tx_skb_lock);
}
if (!tc6->ongoing_tx_skb)
break;
@@ -1111,8 +1114,9 @@ static int oa_tc6_spi_thread_handler(void *data)
/* This kthread will be waken up if there is a tx skb or mac-phy
* interrupt to perform spi transfer with tx chunks.
*/
- wait_event_interruptible(tc6->spi_wq, tc6->waiting_tx_skb ||
- tc6->int_flag ||
+ wait_event_interruptible(tc6->spi_wq, tc6->int_flag ||
+ (tc6->waiting_tx_skb &&
+ tc6->tx_credits) ||
kthread_should_stop());
if (kthread_should_stop())
@@ -1209,7 +1213,9 @@ netdev_tx_t oa_tc6_start_xmit(struct oa_tc6 *tc6, struct sk_buff *skb)
return NETDEV_TX_OK;
}
+ spin_lock_bh(&tc6->tx_skb_lock);
tc6->waiting_tx_skb = skb;
+ spin_unlock_bh(&tc6->tx_skb_lock);
/* Wake spi kthread to perform spi transfer */
wake_up_interruptible(&tc6->spi_wq);
@@ -1239,6 +1245,7 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev)
tc6->netdev = netdev;
SET_NETDEV_DEV(netdev, &spi->dev);
mutex_init(&tc6->spi_ctrl_lock);
+ spin_lock_init(&tc6->tx_skb_lock);
/* Set the SPI controller to pump at realtime priority */
tc6->spi->rt = true;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 9e42d599840d..57edcde9e6f8 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -277,7 +277,10 @@ void ionic_dev_teardown(struct ionic *ionic)
idev->phy_cmb_pages = 0;
idev->cmb_npages = 0;
- destroy_workqueue(ionic->wq);
+ if (ionic->wq) {
+ destroy_workqueue(ionic->wq);
+ ionic->wq = NULL;
+ }
mutex_destroy(&idev->cmb_inuse_lock);
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index dda22fa4448c..9b7f78b6cdb1 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -961,8 +961,8 @@ static int ionic_get_module_eeprom(struct net_device *netdev,
len = min_t(u32, sizeof(xcvr->sprom), ee->len);
do {
- memcpy(data, xcvr->sprom, len);
- memcpy(tbuf, xcvr->sprom, len);
+ memcpy(data, &xcvr->sprom[ee->offset], len);
+ memcpy(tbuf, &xcvr->sprom[ee->offset], len);
/* Let's make sure we got a consistent copy */
if (!memcmp(data, tbuf, len))
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 40496587b2b3..3d3f936779f7 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -3869,8 +3869,8 @@ int ionic_lif_register(struct ionic_lif *lif)
/* only register LIF0 for now */
err = register_netdev(lif->netdev);
if (err) {
- dev_err(lif->ionic->dev, "Cannot register net device, aborting\n");
- ionic_lif_unregister_phc(lif);
+ dev_err(lif->ionic->dev, "Cannot register net device: %d, aborting\n", err);
+ ionic_lif_unregister(lif);
return err;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index b45efc272fdb..c7f497c36f66 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -3358,6 +3358,7 @@ int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn)
p_ptt, &nvm_info.num_images);
if (rc == -EOPNOTSUPP) {
DP_INFO(p_hwfn, "DRV_MSG_CODE_BIST_TEST is not supported\n");
+ nvm_info.num_images = 0;
goto out;
} else if (rc || !nvm_info.num_images) {
DP_ERR(p_hwfn, "Failed getting number of images\n");
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index dbbbf024e7ab..9ac6e2aad18f 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -547,7 +547,6 @@ static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv)
desc = &gq->ts_ring[gq->ring_size];
desc->desc.die_dt = DT_LINKFIX;
rswitch_desc_set_dptr(&desc->desc, gq->ring_dma);
- INIT_LIST_HEAD(&priv->gwca.ts_info_list);
return 0;
}
@@ -1003,9 +1002,10 @@ static int rswitch_gwca_request_irqs(struct rswitch_private *priv)
static void rswitch_ts(struct rswitch_private *priv)
{
struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue;
- struct rswitch_gwca_ts_info *ts_info, *ts_info2;
struct skb_shared_hwtstamps shhwtstamps;
struct rswitch_ts_desc *desc;
+ struct rswitch_device *rdev;
+ struct sk_buff *ts_skb;
struct timespec64 ts;
unsigned int num;
u32 tag, port;
@@ -1015,23 +1015,28 @@ static void rswitch_ts(struct rswitch_private *priv)
dma_rmb();
port = TS_DESC_DPN(__le32_to_cpu(desc->desc.dptrl));
- tag = TS_DESC_TSUN(__le32_to_cpu(desc->desc.dptrl));
-
- list_for_each_entry_safe(ts_info, ts_info2, &priv->gwca.ts_info_list, list) {
- if (!(ts_info->port == port && ts_info->tag == tag))
- continue;
-
- memset(&shhwtstamps, 0, sizeof(shhwtstamps));
- ts.tv_sec = __le32_to_cpu(desc->ts_sec);
- ts.tv_nsec = __le32_to_cpu(desc->ts_nsec & cpu_to_le32(0x3fffffff));
- shhwtstamps.hwtstamp = timespec64_to_ktime(ts);
- skb_tstamp_tx(ts_info->skb, &shhwtstamps);
- dev_consume_skb_irq(ts_info->skb);
- list_del(&ts_info->list);
- kfree(ts_info);
- break;
- }
+ if (unlikely(port >= RSWITCH_NUM_PORTS))
+ goto next;
+ rdev = priv->rdev[port];
+ tag = TS_DESC_TSUN(__le32_to_cpu(desc->desc.dptrl));
+ if (unlikely(tag >= TS_TAGS_PER_PORT))
+ goto next;
+ ts_skb = xchg(&rdev->ts_skb[tag], NULL);
+ smp_mb(); /* order rdev->ts_skb[] read before bitmap update */
+ clear_bit(tag, rdev->ts_skb_used);
+
+ if (unlikely(!ts_skb))
+ goto next;
+
+ memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+ ts.tv_sec = __le32_to_cpu(desc->ts_sec);
+ ts.tv_nsec = __le32_to_cpu(desc->ts_nsec & cpu_to_le32(0x3fffffff));
+ shhwtstamps.hwtstamp = timespec64_to_ktime(ts);
+ skb_tstamp_tx(ts_skb, &shhwtstamps);
+ dev_consume_skb_irq(ts_skb);
+
+next:
gq->cur = rswitch_next_queue_index(gq, true, 1);
desc = &gq->ts_ring[gq->cur];
}
@@ -1576,8 +1581,9 @@ static int rswitch_open(struct net_device *ndev)
static int rswitch_stop(struct net_device *ndev)
{
struct rswitch_device *rdev = netdev_priv(ndev);
- struct rswitch_gwca_ts_info *ts_info, *ts_info2;
+ struct sk_buff *ts_skb;
unsigned long flags;
+ unsigned int tag;
netif_tx_stop_all_queues(ndev);
@@ -1594,12 +1600,13 @@ static int rswitch_stop(struct net_device *ndev)
if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS))
iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID);
- list_for_each_entry_safe(ts_info, ts_info2, &rdev->priv->gwca.ts_info_list, list) {
- if (ts_info->port != rdev->port)
- continue;
- dev_kfree_skb_irq(ts_info->skb);
- list_del(&ts_info->list);
- kfree(ts_info);
+ for (tag = find_first_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT);
+ tag < TS_TAGS_PER_PORT;
+ tag = find_next_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT, tag + 1)) {
+ ts_skb = xchg(&rdev->ts_skb[tag], NULL);
+ clear_bit(tag, rdev->ts_skb_used);
+ if (ts_skb)
+ dev_kfree_skb(ts_skb);
}
return 0;
@@ -1612,20 +1619,17 @@ static bool rswitch_ext_desc_set_info1(struct rswitch_device *rdev,
desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) |
INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT);
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
- struct rswitch_gwca_ts_info *ts_info;
+ unsigned int tag;
- ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC);
- if (!ts_info)
+ tag = find_first_zero_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT);
+ if (tag == TS_TAGS_PER_PORT)
return false;
+ smp_mb(); /* order bitmap read before rdev->ts_skb[] write */
+ rdev->ts_skb[tag] = skb_get(skb);
+ set_bit(tag, rdev->ts_skb_used);
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
- rdev->ts_tag++;
- desc->info1 |= cpu_to_le64(INFO1_TSUN(rdev->ts_tag) | INFO1_TXC);
-
- ts_info->skb = skb_get(skb);
- ts_info->port = rdev->port;
- ts_info->tag = rdev->ts_tag;
- list_add_tail(&ts_info->list, &rdev->priv->gwca.ts_info_list);
+ desc->info1 |= cpu_to_le64(INFO1_TSUN(tag) | INFO1_TXC);
skb_tx_timestamp(skb);
}
diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
index e020800dcc57..d8d4ed7d7f8b 100644
--- a/drivers/net/ethernet/renesas/rswitch.h
+++ b/drivers/net/ethernet/renesas/rswitch.h
@@ -972,14 +972,6 @@ struct rswitch_gwca_queue {
};
};
-struct rswitch_gwca_ts_info {
- struct sk_buff *skb;
- struct list_head list;
-
- int port;
- u8 tag;
-};
-
#define RSWITCH_NUM_IRQ_REGS (RSWITCH_MAX_NUM_QUEUES / BITS_PER_TYPE(u32))
struct rswitch_gwca {
unsigned int index;
@@ -989,7 +981,6 @@ struct rswitch_gwca {
struct rswitch_gwca_queue *queues;
int num_queues;
struct rswitch_gwca_queue ts_queue;
- struct list_head ts_info_list;
DECLARE_BITMAP(used, RSWITCH_MAX_NUM_QUEUES);
u32 tx_irq_bits[RSWITCH_NUM_IRQ_REGS];
u32 rx_irq_bits[RSWITCH_NUM_IRQ_REGS];
@@ -997,6 +988,7 @@ struct rswitch_gwca {
};
#define NUM_QUEUES_PER_NDEV 2
+#define TS_TAGS_PER_PORT 256
struct rswitch_device {
struct rswitch_private *priv;
struct net_device *ndev;
@@ -1004,7 +996,8 @@ struct rswitch_device {
void __iomem *addr;
struct rswitch_gwca_queue *tx_queue;
struct rswitch_gwca_queue *rx_queue;
- u8 ts_tag;
+ struct sk_buff *ts_skb[TS_TAGS_PER_PORT];
+ DECLARE_BITMAP(ts_skb_used, TS_TAGS_PER_PORT);
bool disabled;
int port;
diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
index b156493d7084..aea0f0357568 100644
--- a/drivers/net/mdio/fwnode_mdio.c
+++ b/drivers/net/mdio/fwnode_mdio.c
@@ -40,6 +40,7 @@ fwnode_find_pse_control(struct fwnode_handle *fwnode)
static struct mii_timestamper *
fwnode_find_mii_timestamper(struct fwnode_handle *fwnode)
{
+ struct mii_timestamper *mii_ts;
struct of_phandle_args arg;
int err;
@@ -53,10 +54,16 @@ fwnode_find_mii_timestamper(struct fwnode_handle *fwnode)
else if (err)
return ERR_PTR(err);
- if (arg.args_count != 1)
- return ERR_PTR(-EINVAL);
+ if (arg.args_count != 1) {
+ mii_ts = ERR_PTR(-EINVAL);
+ goto put_node;
+ }
+
+ mii_ts = register_mii_timestamper(arg.np, arg.args[0]);
- return register_mii_timestamper(arg.np, arg.args[0]);
+put_node:
+ of_node_put(arg.np);
+ return mii_ts;
}
int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c
index 70e8bdf34be9..688f05316b5e 100644
--- a/drivers/net/netdevsim/health.c
+++ b/drivers/net/netdevsim/health.c
@@ -149,6 +149,8 @@ static ssize_t nsim_dev_health_break_write(struct file *file,
char *break_msg;
int err;
+ if (count == 0 || count > PAGE_SIZE)
+ return -EINVAL;
break_msg = memdup_user_nul(data, count);
if (IS_ERR(break_msg))
return PTR_ERR(break_msg);
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 0be47fed4efc..e068a9761c09 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -635,10 +635,10 @@ nsim_pp_hold_write(struct file *file, const char __user *data,
page_pool_put_full_page(ns->page->pp, ns->page, false);
ns->page = NULL;
}
- rtnl_unlock();
exit:
- return count;
+ rtnl_unlock();
+ return ret;
}
static const struct file_operations nsim_pp_hold_fops = {
diff --git a/drivers/net/phy/aquantia/aquantia_leds.c b/drivers/net/phy/aquantia/aquantia_leds.c
index 00ad2313fed3..951f46104eff 100644
--- a/drivers/net/phy/aquantia/aquantia_leds.c
+++ b/drivers/net/phy/aquantia/aquantia_leds.c
@@ -156,5 +156,5 @@ int aqr_phy_led_polarity_set(struct phy_device *phydev, int index, unsigned long
if (force_active_high || force_active_low)
return aqr_phy_led_active_low_set(phydev, index, force_active_low);
- unreachable();
+ return -EINVAL;
}
diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c
index b672c55a7a4e..e6ed2413e514 100644
--- a/drivers/net/phy/intel-xway.c
+++ b/drivers/net/phy/intel-xway.c
@@ -529,7 +529,7 @@ static int xway_gphy_led_polarity_set(struct phy_device *phydev, int index,
if (force_active_high)
return phy_clear_bits(phydev, XWAY_MDIO_LED, XWAY_GPHY_LED_INV(index));
- unreachable();
+ return -EINVAL;
}
static struct phy_driver xway_gphy[] = {
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
index db3c1f72b407..a8ccf257c109 100644
--- a/drivers/net/phy/mxl-gpy.c
+++ b/drivers/net/phy/mxl-gpy.c
@@ -1014,7 +1014,7 @@ static int gpy_led_polarity_set(struct phy_device *phydev, int index,
if (force_active_high)
return phy_clear_bits(phydev, PHY_LED, PHY_LED_POLARITY(index));
- unreachable();
+ return -EINVAL;
}
static struct phy_driver gpy_drivers[] = {
diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index 69ea2c3c76bf..c7690adec8db 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -998,9 +998,13 @@ static void __team_compute_features(struct team *team)
unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
IFF_XMIT_DST_RELEASE_PERM;
+ rcu_read_lock();
+ if (list_empty(&team->port_list))
+ goto done;
+
vlan_features = netdev_base_features(vlan_features);
+ enc_features = netdev_base_features(enc_features);
- rcu_read_lock();
list_for_each_entry_rcu(port, &team->port_list, list) {
vlan_features = netdev_increment_features(vlan_features,
port->dev->vlan_features,
@@ -1010,11 +1014,11 @@ static void __team_compute_features(struct team *team)
port->dev->hw_enc_features,
TEAM_ENC_FEATURES);
-
dst_release_flag &= port->dev->priv_flags;
if (port->dev->hard_header_len > max_hard_header_len)
max_hard_header_len = port->dev->hard_header_len;
}
+done:
rcu_read_unlock();
team->dev->vlan_features = vlan_features;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d7a865ef370b..e816aaba8e5f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1481,7 +1481,7 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
skb->truesize += skb->data_len;
for (i = 1; i < it->nr_segs; i++) {
- const struct iovec *iov = iter_iov(it);
+ const struct iovec *iov = iter_iov(it) + i;
size_t fragsz = iov->iov_len;
struct page *page;
void *frag;
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 9fe7f704a2f7..e9208a8d2bfa 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1429,6 +1429,7 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */
{QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x030e, 4)}, /* Quectel EM05GV2 */
+ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0316, 3)}, /* Quectel RG255C */
{QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */
{QMI_QUIRK_SET_DTR(0x2cb7, 0x0112, 0)}, /* Fibocom FG132 */
{QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 4265c1cd0ff7..63fe51d0e64d 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -867,7 +867,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
static int xennet_close(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
- unsigned int num_queues = dev->real_num_tx_queues;
+ unsigned int num_queues = np->queues ? dev->real_num_tx_queues : 0;
unsigned int i;
struct netfront_queue *queue;
netif_tx_stop_all_queues(np->netdev);
@@ -882,6 +882,9 @@ static void xennet_destroy_queues(struct netfront_info *info)
{
unsigned int i;
+ if (!info->queues)
+ return;
+
for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
struct netfront_queue *queue = &info->queues[i];
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d169a30eb935..a970168a3014 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2034,7 +2034,7 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
* or smaller than a sector size yet, so catch this early and don't
* allow block I/O.
*/
- if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) {
+ if (blk_validate_block_size(bs)) {
bs = (1 << 9);
valid = false;
}
diff --git a/drivers/of/address.c b/drivers/of/address.c
index c5b925ac469f..c1f1c810e810 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -459,7 +459,8 @@ static int of_translate_one(const struct device_node *parent, const struct of_bu
}
if (ranges == NULL || rlen == 0) {
offset = of_read_number(addr, na);
- memset(addr, 0, pna * 4);
+ /* set address to zero, pass flags through */
+ memset(addr + pbus->flag_cells, 0, (pna - pbus->flag_cells) * 4);
pr_debug("empty ranges; 1:1 translation\n");
goto finish;
}
@@ -619,7 +620,7 @@ struct device_node *__of_get_dma_parent(const struct device_node *np)
if (ret < 0)
return of_get_parent(np);
- return of_node_get(args.np);
+ return args.np;
}
#endif
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 7dc394255a0a..6f5abea2462a 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -88,7 +88,8 @@ static bool __of_node_is_type(const struct device_node *np, const char *type)
}
#define EXCLUDED_DEFAULT_CELLS_PLATFORMS ( \
- IS_ENABLED(CONFIG_SPARC) \
+ IS_ENABLED(CONFIG_SPARC) || \
+ of_find_compatible_node(NULL, NULL, "coreboot") \
)
int of_bus_n_addr_cells(struct device_node *np)
@@ -1507,8 +1508,10 @@ int of_parse_phandle_with_args_map(const struct device_node *np,
map_len--;
/* Check if not found */
- if (!new)
+ if (!new) {
+ ret = -EINVAL;
goto put;
+ }
if (!of_device_is_available(new))
match = 0;
@@ -1518,17 +1521,20 @@ int of_parse_phandle_with_args_map(const struct device_node *np,
goto put;
/* Check for malformed properties */
- if (WARN_ON(new_size > MAX_PHANDLE_ARGS))
- goto put;
- if (map_len < new_size)
+ if (WARN_ON(new_size > MAX_PHANDLE_ARGS) ||
+ map_len < new_size) {
+ ret = -EINVAL;
goto put;
+ }
/* Move forward by new node's #<list>-cells amount */
map += new_size;
map_len -= new_size;
}
- if (!match)
+ if (!match) {
+ ret = -ENOENT;
goto put;
+ }
/* Get the <list>-map-pass-thru property (optional) */
pass = of_get_property(cur, pass_name, NULL);
diff --git a/drivers/of/empty_root.dts b/drivers/of/empty_root.dts
index cf9e97a60f48..cbe169ba3db5 100644
--- a/drivers/of/empty_root.dts
+++ b/drivers/of/empty_root.dts
@@ -2,5 +2,12 @@
/dts-v1/;
/ {
-
+ /*
+ * #address-cells/#size-cells are required properties at root node.
+ * Use 2 cells for both address cells and size cells in order to fully
+ * support 64-bit addresses and sizes on systems using this empty root
+ * node.
+ */
+ #address-cells = <0x02>;
+ #size-cells = <0x02>;
};
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 67fc0ceaa5f5..98b1cf78ecac 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -111,6 +111,7 @@ const __be32 *of_irq_parse_imap_parent(const __be32 *imap, int len, struct of_ph
else
np = of_find_node_by_phandle(be32_to_cpup(imap));
imap++;
+ len--;
/* Check if not found */
if (!np) {
@@ -354,6 +355,7 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar
return of_irq_parse_oldworld(device, index, out_irq);
/* Get the reg property (if any) */
+ addr_len = 0;
addr = of_get_property(device, "reg", &addr_len);
/* Prevent out-of-bounds read in case of longer interrupt parent address size */
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 519bf9229e61..cfc8aea002e4 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -1286,7 +1286,6 @@ DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells")
DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells")
DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells")
DEFINE_SIMPLE_PROP(io_backends, "io-backends", "#io-backend-cells")
-DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL)
DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells")
DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells")
DEFINE_SIMPLE_PROP(hwlocks, "hwlocks", "#hwlock-cells")
@@ -1432,7 +1431,6 @@ static const struct supplier_bindings of_supplier_bindings[] = {
{ .parse_prop = parse_mboxes, },
{ .parse_prop = parse_io_channels, },
{ .parse_prop = parse_io_backends, },
- { .parse_prop = parse_interrupt_parent, },
{ .parse_prop = parse_dmas, .optional = true, },
{ .parse_prop = parse_power_domains, },
{ .parse_prop = parse_hwlocks, },
diff --git a/drivers/of/unittest-data/tests-address.dtsi b/drivers/of/unittest-data/tests-address.dtsi
index 3344f15c3755..f02a181bb125 100644
--- a/drivers/of/unittest-data/tests-address.dtsi
+++ b/drivers/of/unittest-data/tests-address.dtsi
@@ -114,6 +114,7 @@
device_type = "pci";
ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x7f00000>,
<0x81000000 0 0x00000000 0 0xefff0000 0 0x0010000>;
+ dma-ranges = <0x43000000 0x10 0x00 0x00 0x00 0x00 0x10000000>;
reg = <0x00000000 0xd1070000 0x20000>;
pci@0,0 {
@@ -142,6 +143,7 @@
#size-cells = <0x01>;
ranges = <0xa0000000 0 0 0 0x2000000>,
<0xb0000000 1 0 0 0x1000000>;
+ dma-ranges = <0xc0000000 0x43000000 0x10 0x00 0x10000000>;
dev@e0000000 {
reg = <0xa0001000 0x1000>,
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 42b411f32b1b..438fd70fa995 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -1213,6 +1213,44 @@ static void __init of_unittest_pci_dma_ranges(void)
of_node_put(np);
}
+static void __init of_unittest_pci_empty_dma_ranges(void)
+{
+ struct device_node *np;
+ struct of_pci_range range;
+ struct of_pci_range_parser parser;
+
+ if (!IS_ENABLED(CONFIG_PCI))
+ return;
+
+ np = of_find_node_by_path("/testcase-data/address-tests2/pcie@d1070000/pci@0,0/dev@0,0/local-bus@0");
+ if (!np) {
+ pr_err("missing testcase data\n");
+ return;
+ }
+
+ if (of_pci_dma_range_parser_init(&parser, np)) {
+ pr_err("missing dma-ranges property\n");
+ return;
+ }
+
+ /*
+ * Get the dma-ranges from the device tree
+ */
+ for_each_of_pci_range(&parser, &range) {
+ unittest(range.size == 0x10000000,
+ "for_each_of_pci_range wrong size on node %pOF size=%llx\n",
+ np, range.size);
+ unittest(range.cpu_addr == 0x00000000,
+ "for_each_of_pci_range wrong CPU addr (%llx) on node %pOF",
+ range.cpu_addr, np);
+ unittest(range.pci_addr == 0xc0000000,
+ "for_each_of_pci_range wrong DMA addr (%llx) on node %pOF",
+ range.pci_addr, np);
+ }
+
+ of_node_put(np);
+}
+
static void __init of_unittest_bus_ranges(void)
{
struct device_node *np;
@@ -4272,6 +4310,7 @@ static int __init of_unittest(void)
of_unittest_dma_get_max_cpu_address();
of_unittest_parse_dma_ranges();
of_unittest_pci_dma_ranges();
+ of_unittest_pci_empty_dma_ranges();
of_unittest_bus_ranges();
of_unittest_bus_3cell_ranges();
of_unittest_reg();
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0b29ec6e8e5e..661f98c6c63a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6232,12 +6232,14 @@ u8 pcie_get_supported_speeds(struct pci_dev *dev)
pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2);
speeds = lnkcap2 & PCI_EXP_LNKCAP2_SLS;
+ /* Ignore speeds higher than Max Link Speed */
+ pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
+ speeds &= GENMASK(lnkcap & PCI_EXP_LNKCAP_SLS, 0);
+
/* PCIe r3.0-compliant */
if (speeds)
return speeds;
- pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
-
/* Synthesize from the Max Link Speed field */
if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_5_0GB)
speeds = PCI_EXP_LNKCAP2_SLS_5_0GB | PCI_EXP_LNKCAP2_SLS_2_5GB;
diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c
index 5e10306b6308..02e73099bad0 100644
--- a/drivers/pci/pcie/portdrv.c
+++ b/drivers/pci/pcie/portdrv.c
@@ -265,12 +265,14 @@ static int get_port_device_capability(struct pci_dev *dev)
(pcie_ports_dpc_native || (services & PCIE_PORT_SERVICE_AER)))
services |= PCIE_PORT_SERVICE_DPC;
+ /* Enable bandwidth control if more than one speed is supported. */
if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
u32 linkcap;
pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap);
- if (linkcap & PCI_EXP_LNKCAP_LBNC)
+ if (linkcap & PCI_EXP_LNKCAP_LBNC &&
+ hweight8(dev->supported_speeds) > 1)
services |= PCIE_PORT_SERVICE_BWCTRL;
}
diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
index 950b7ae1d1a8..dc452610934a 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
@@ -325,6 +325,12 @@ static void usb_init_common_7216(struct brcm_usb_init_params *params)
void __iomem *ctrl = params->regs[BRCM_REGS_CTRL];
USB_CTRL_UNSET(ctrl, USB_PM, XHC_S2_CLK_SWITCH_EN);
+
+ /*
+ * The PHY might be in a bad state if it is already powered
+ * up. Toggle the power just in case.
+ */
+ USB_CTRL_SET(ctrl, USB_PM, USB_PWRDN);
USB_CTRL_UNSET(ctrl, USB_PM, USB_PWRDN);
/* 1 millisecond - for USB clocks to settle down */
diff --git a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c
index 2c8038864357..d3ccf547ba1c 100644
--- a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c
+++ b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c
@@ -424,8 +424,7 @@ static unsigned long fsl_samsung_hdmi_phy_find_pms(unsigned long fout, u8 *p, u1
* Fvco = (M * f_ref) / P,
* where f_ref is 24MHz.
*/
- tmp = (u64)_m * 24 * MHZ;
- do_div(tmp, _p);
+ tmp = div64_ul((u64)_m * 24 * MHZ, _p);
if (tmp < 750 * MHZ ||
tmp > 3000 * MHZ)
continue;
diff --git a/drivers/phy/mediatek/Kconfig b/drivers/phy/mediatek/Kconfig
index 60e00057e8bc..ba6461350951 100644
--- a/drivers/phy/mediatek/Kconfig
+++ b/drivers/phy/mediatek/Kconfig
@@ -65,6 +65,7 @@ config PHY_MTK_HDMI
depends on ARCH_MEDIATEK || COMPILE_TEST
depends on COMMON_CLK
depends on OF
+ depends on REGULATOR
select GENERIC_PHY
help
Support HDMI PHY for Mediatek SoCs.
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index f053b525ccff..413f76e2d174 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -145,8 +145,10 @@ static struct phy_provider *of_phy_provider_lookup(struct device_node *node)
return phy_provider;
for_each_child_of_node(phy_provider->children, child)
- if (child == node)
+ if (child == node) {
+ of_node_put(child);
return phy_provider;
+ }
}
return ERR_PTR(-EPROBE_DEFER);
@@ -629,8 +631,10 @@ static struct phy *_of_phy_get(struct device_node *np, int index)
return ERR_PTR(-ENODEV);
/* This phy type handled by the usb-phy subsystem for now */
- if (of_device_is_compatible(args.np, "usb-nop-xceiv"))
- return ERR_PTR(-ENODEV);
+ if (of_device_is_compatible(args.np, "usb-nop-xceiv")) {
+ phy = ERR_PTR(-ENODEV);
+ goto out_put_node;
+ }
mutex_lock(&phy_provider_mutex);
phy_provider = of_phy_provider_lookup(args.np);
@@ -652,6 +656,7 @@ out_put_module:
out_unlock:
mutex_unlock(&phy_provider_mutex);
+out_put_node:
of_node_put(args.np);
return phy;
@@ -737,7 +742,7 @@ void devm_phy_put(struct device *dev, struct phy *phy)
if (!phy)
return;
- r = devres_destroy(dev, devm_phy_release, devm_phy_match, phy);
+ r = devres_release(dev, devm_phy_release, devm_phy_match, phy);
dev_WARN_ONCE(dev, r, "couldn't find PHY resource\n");
}
EXPORT_SYMBOL_GPL(devm_phy_put);
@@ -1121,7 +1126,7 @@ void devm_phy_destroy(struct device *dev, struct phy *phy)
{
int r;
- r = devres_destroy(dev, devm_phy_consume, devm_phy_match, phy);
+ r = devres_release(dev, devm_phy_consume, devm_phy_match, phy);
dev_WARN_ONCE(dev, r, "couldn't find PHY resource\n");
}
EXPORT_SYMBOL_GPL(devm_phy_destroy);
@@ -1259,12 +1264,12 @@ EXPORT_SYMBOL_GPL(of_phy_provider_unregister);
* of_phy_provider_unregister to unregister the phy provider.
*/
void devm_of_phy_provider_unregister(struct device *dev,
- struct phy_provider *phy_provider)
+ struct phy_provider *phy_provider)
{
int r;
- r = devres_destroy(dev, devm_phy_provider_release, devm_phy_match,
- phy_provider);
+ r = devres_release(dev, devm_phy_provider_release, devm_phy_match,
+ phy_provider);
dev_WARN_ONCE(dev, r, "couldn't find PHY provider device resource\n");
}
EXPORT_SYMBOL_GPL(devm_of_phy_provider_unregister);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index acd6075bf6d9..c9c337840715 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1052,7 +1052,7 @@ static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_rx_tbl[] = {
QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN, 0x2f),
QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW, 0xff),
QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x0f),
- QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_GAIN, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FO_GAIN, 0x0a),
QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL1, 0x54),
QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x0f),
QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f),
diff --git a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c
index 0a9989e41237..2eb3329ca23f 100644
--- a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c
+++ b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c
@@ -309,7 +309,7 @@ static int rockchip_combphy_parse_dt(struct device *dev, struct rockchip_combphy
priv->ext_refclk = device_property_present(dev, "rockchip,ext-refclk");
- priv->phy_rst = devm_reset_control_array_get_exclusive(dev);
+ priv->phy_rst = devm_reset_control_get(dev, "phy");
if (IS_ERR(priv->phy_rst))
return dev_err_probe(dev, PTR_ERR(priv->phy_rst), "failed to get phy reset\n");
diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
index ceab9c71d3b5..0965b9d4f9cf 100644
--- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
+++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
@@ -1101,6 +1101,8 @@ static int rk_hdptx_phy_probe(struct platform_device *pdev)
return dev_err_probe(dev, PTR_ERR(hdptx->grf),
"Could not get GRF syscon\n");
+ platform_set_drvdata(pdev, hdptx);
+
ret = devm_pm_runtime_enable(dev);
if (ret)
return dev_err_probe(dev, ret, "Failed to enable runtime PM\n");
@@ -1110,7 +1112,6 @@ static int rk_hdptx_phy_probe(struct platform_device *pdev)
return dev_err_probe(dev, PTR_ERR(hdptx->phy),
"Failed to create HDMI PHY\n");
- platform_set_drvdata(pdev, hdptx);
phy_set_drvdata(hdptx->phy, hdptx);
phy_set_bus_width(hdptx->phy, 8);
diff --git a/drivers/phy/st/phy-stm32-combophy.c b/drivers/phy/st/phy-stm32-combophy.c
index 765bb34fe358..49e9fa90a681 100644
--- a/drivers/phy/st/phy-stm32-combophy.c
+++ b/drivers/phy/st/phy-stm32-combophy.c
@@ -122,6 +122,7 @@ static int stm32_impedance_tune(struct stm32_combophy *combophy)
u32 max_vswing = imp_lookup[imp_size - 1].vswing[vswing_size - 1];
u32 min_vswing = imp_lookup[0].vswing[0];
u32 val;
+ u32 regval;
if (!of_property_read_u32(combophy->dev->of_node, "st,output-micro-ohms", &val)) {
if (val < min_imp || val > max_imp) {
@@ -129,16 +130,20 @@ static int stm32_impedance_tune(struct stm32_combophy *combophy)
return -EINVAL;
}
- for (imp_of = 0; imp_of < ARRAY_SIZE(imp_lookup); imp_of++)
- if (imp_lookup[imp_of].microohm <= val)
+ regval = 0;
+ for (imp_of = 0; imp_of < ARRAY_SIZE(imp_lookup); imp_of++) {
+ if (imp_lookup[imp_of].microohm <= val) {
+ regval = FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_OHM, imp_of);
break;
+ }
+ }
dev_dbg(combophy->dev, "Set %u micro-ohms output impedance\n",
imp_lookup[imp_of].microohm);
regmap_update_bits(combophy->regmap, SYSCFG_PCIEPRGCR,
STM32MP25_PCIEPRG_IMPCTRL_OHM,
- FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_OHM, imp_of));
+ regval);
} else {
regmap_read(combophy->regmap, SYSCFG_PCIEPRGCR, &val);
imp_of = FIELD_GET(STM32MP25_PCIEPRG_IMPCTRL_OHM, val);
@@ -150,16 +155,20 @@ static int stm32_impedance_tune(struct stm32_combophy *combophy)
return -EINVAL;
}
- for (vswing_of = 0; vswing_of < ARRAY_SIZE(imp_lookup[imp_of].vswing); vswing_of++)
- if (imp_lookup[imp_of].vswing[vswing_of] >= val)
+ regval = 0;
+ for (vswing_of = 0; vswing_of < ARRAY_SIZE(imp_lookup[imp_of].vswing); vswing_of++) {
+ if (imp_lookup[imp_of].vswing[vswing_of] >= val) {
+ regval = FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_VSWING, vswing_of);
break;
+ }
+ }
dev_dbg(combophy->dev, "Set %u microvolt swing\n",
imp_lookup[imp_of].vswing[vswing_of]);
regmap_update_bits(combophy->regmap, SYSCFG_PCIEPRGCR,
STM32MP25_PCIEPRG_IMPCTRL_VSWING,
- FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_VSWING, vswing_of));
+ regval);
}
return 0;
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 924bf4d3cc77..8470b7f2b135 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -707,7 +707,7 @@ static const struct dmi_system_id cros_ec_lpc_dmi_table[] __initconst = {
/* Framework Laptop (12th Gen Intel Core) */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
- DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "12th Gen Intel Core"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Laptop (12th Gen Intel Core)"),
},
.driver_data = (void *)&framework_laptop_mec_lpc_driver_data,
},
@@ -715,7 +715,7 @@ static const struct dmi_system_id cros_ec_lpc_dmi_table[] __initconst = {
/* Framework Laptop (13th Gen Intel Core) */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
- DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "13th Gen Intel Core"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Laptop (13th Gen Intel Core)"),
},
.driver_data = (void *)&framework_laptop_mec_lpc_driver_data,
},
diff --git a/drivers/platform/loongarch/Kconfig b/drivers/platform/loongarch/Kconfig
index 5633e4d73991..447528797d07 100644
--- a/drivers/platform/loongarch/Kconfig
+++ b/drivers/platform/loongarch/Kconfig
@@ -18,7 +18,7 @@ if LOONGARCH_PLATFORM_DEVICES
config LOONGSON_LAPTOP
tristate "Generic Loongson-3 Laptop Driver"
- depends on ACPI
+ depends on ACPI_EC
depends on BACKLIGHT_CLASS_DEVICE
depends on INPUT
depends on MACH_LOONGSON64
diff --git a/drivers/platform/x86/dell/alienware-wmi.c b/drivers/platform/x86/dell/alienware-wmi.c
index 77465ed9b449..341d01d3e3e4 100644
--- a/drivers/platform/x86/dell/alienware-wmi.c
+++ b/drivers/platform/x86/dell/alienware-wmi.c
@@ -190,7 +190,7 @@ static struct quirk_entry quirk_asm201 = {
};
static struct quirk_entry quirk_g_series = {
- .num_zones = 2,
+ .num_zones = 0,
.hdmi_mux = 0,
.amplifier = 0,
.deepslp = 0,
@@ -199,7 +199,7 @@ static struct quirk_entry quirk_g_series = {
};
static struct quirk_entry quirk_x_series = {
- .num_zones = 2,
+ .num_zones = 0,
.hdmi_mux = 0,
.amplifier = 0,
.deepslp = 0,
@@ -243,6 +243,15 @@ static const struct dmi_system_id alienware_quirks[] __initconst = {
},
{
.callback = dmi_matched,
+ .ident = "Alienware m16 R1 AMD",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1 AMD"),
+ },
+ .driver_data = &quirk_x_series,
+ },
+ {
+ .callback = dmi_matched,
.ident = "Alienware m17 R5",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
@@ -687,6 +696,9 @@ static void alienware_zone_exit(struct platform_device *dev)
{
u8 zone;
+ if (!quirks->num_zones)
+ return;
+
sysfs_remove_group(&dev->dev.kobj, &zone_attribute_group);
led_classdev_unregister(&global_led);
if (zone_dev_attrs) {
@@ -1229,9 +1241,11 @@ static int __init alienware_wmi_init(void)
goto fail_prep_thermal_profile;
}
- ret = alienware_zone_init(platform_device);
- if (ret)
- goto fail_prep_zones;
+ if (quirks->num_zones > 0) {
+ ret = alienware_zone_init(platform_device);
+ if (ret)
+ goto fail_prep_zones;
+ }
return 0;
diff --git a/drivers/platform/x86/intel/ifs/core.c b/drivers/platform/x86/intel/ifs/core.c
index bc252b883210..1ae50702bdb7 100644
--- a/drivers/platform/x86/intel/ifs/core.c
+++ b/drivers/platform/x86/intel/ifs/core.c
@@ -20,6 +20,7 @@ static const struct x86_cpu_id ifs_cpu_ids[] __initconst = {
X86_MATCH(INTEL_GRANITERAPIDS_X, ARRAY_GEN0),
X86_MATCH(INTEL_GRANITERAPIDS_D, ARRAY_GEN0),
X86_MATCH(INTEL_ATOM_CRESTMONT_X, ARRAY_GEN1),
+ X86_MATCH(INTEL_ATOM_DARKMONT_X, ARRAY_GEN1),
{}
};
MODULE_DEVICE_TABLE(x86cpu, ifs_cpu_ids);
diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c
index 4a85aad2475a..8272f1dd0fbc 100644
--- a/drivers/platform/x86/intel/vsec.c
+++ b/drivers/platform/x86/intel/vsec.c
@@ -423,6 +423,7 @@ static const struct intel_vsec_platform_info lnl_info = {
#define PCI_DEVICE_ID_INTEL_VSEC_RPL 0xa77d
#define PCI_DEVICE_ID_INTEL_VSEC_TGL 0x9a0d
#define PCI_DEVICE_ID_INTEL_VSEC_LNL_M 0x647d
+#define PCI_DEVICE_ID_INTEL_VSEC_PTL 0xb07d
static const struct pci_device_id intel_vsec_pci_ids[] = {
{ PCI_DEVICE_DATA(INTEL, VSEC_ADL, &tgl_info) },
{ PCI_DEVICE_DATA(INTEL, VSEC_DG1, &dg1_info) },
@@ -432,6 +433,7 @@ static const struct pci_device_id intel_vsec_pci_ids[] = {
{ PCI_DEVICE_DATA(INTEL, VSEC_RPL, &tgl_info) },
{ PCI_DEVICE_DATA(INTEL, VSEC_TGL, &tgl_info) },
{ PCI_DEVICE_DATA(INTEL, VSEC_LNL_M, &lnl_info) },
+ { PCI_DEVICE_DATA(INTEL, VSEC_PTL, &mtl_info) },
{ }
};
MODULE_DEVICE_TABLE(pci, intel_vsec_pci_ids);
diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
index d51eb0db0626..cbbb0f809704 100644
--- a/drivers/platform/x86/p2sb.c
+++ b/drivers/platform/x86/p2sb.c
@@ -43,6 +43,7 @@ struct p2sb_res_cache {
};
static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE];
+static bool p2sb_hidden_by_bios;
static void p2sb_get_devfn(unsigned int *devfn)
{
@@ -97,6 +98,12 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn)
static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
{
+ /*
+ * The BIOS prevents the P2SB device from being enumerated by the PCI
+ * subsystem, so we need to unhide and hide it back to lookup the BAR.
+ */
+ pci_bus_write_config_dword(bus, devfn, P2SBC, 0);
+
/* Scan the P2SB device and cache its BAR0 */
p2sb_scan_and_cache_devfn(bus, devfn);
@@ -104,6 +111,8 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
if (devfn == P2SB_DEVFN_GOLDMONT)
p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT);
+ pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE);
+
if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res))
return -ENOENT;
@@ -129,7 +138,7 @@ static int p2sb_cache_resources(void)
u32 value = P2SBC_HIDE;
struct pci_bus *bus;
u16 class;
- int ret;
+ int ret = 0;
/* Get devfn for P2SB device itself */
p2sb_get_devfn(&devfn_p2sb);
@@ -152,22 +161,53 @@ static int p2sb_cache_resources(void)
*/
pci_lock_rescan_remove();
+ pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
+ p2sb_hidden_by_bios = value & P2SBC_HIDE;
+
/*
- * The BIOS prevents the P2SB device from being enumerated by the PCI
- * subsystem, so we need to unhide and hide it back to lookup the BAR.
- * Unhide the P2SB device here, if needed.
+ * If the BIOS does not hide the P2SB device then its resources
+ * are accesilble. Cache them only if the P2SB device is hidden.
*/
- pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
- if (value & P2SBC_HIDE)
- pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0);
+ if (p2sb_hidden_by_bios)
+ ret = p2sb_scan_and_cache(bus, devfn_p2sb);
- ret = p2sb_scan_and_cache(bus, devfn_p2sb);
+ pci_unlock_rescan_remove();
- /* Hide the P2SB device, if it was hidden */
- if (value & P2SBC_HIDE)
- pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE);
+ return ret;
+}
- pci_unlock_rescan_remove();
+static int p2sb_read_from_cache(struct pci_bus *bus, unsigned int devfn,
+ struct resource *mem)
+{
+ struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)];
+
+ if (cache->bus_dev_id != bus->dev.id)
+ return -ENODEV;
+
+ if (!p2sb_valid_resource(&cache->res))
+ return -ENOENT;
+
+ memcpy(mem, &cache->res, sizeof(*mem));
+
+ return 0;
+}
+
+static int p2sb_read_from_dev(struct pci_bus *bus, unsigned int devfn,
+ struct resource *mem)
+{
+ struct pci_dev *pdev;
+ int ret = 0;
+
+ pdev = pci_get_slot(bus, devfn);
+ if (!pdev)
+ return -ENODEV;
+
+ if (p2sb_valid_resource(pci_resource_n(pdev, 0)))
+ p2sb_read_bar0(pdev, mem);
+ else
+ ret = -ENOENT;
+
+ pci_dev_put(pdev);
return ret;
}
@@ -188,8 +228,6 @@ static int p2sb_cache_resources(void)
*/
int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
{
- struct p2sb_res_cache *cache;
-
bus = p2sb_get_bus(bus);
if (!bus)
return -ENODEV;
@@ -197,15 +235,10 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
if (!devfn)
p2sb_get_devfn(&devfn);
- cache = &p2sb_resources[PCI_FUNC(devfn)];
- if (cache->bus_dev_id != bus->dev.id)
- return -ENODEV;
+ if (p2sb_hidden_by_bios)
+ return p2sb_read_from_cache(bus, devfn, mem);
- if (!p2sb_valid_resource(&cache->res))
- return -ENOENT;
-
- memcpy(mem, &cache->res, sizeof(*mem));
- return 0;
+ return p2sb_read_from_dev(bus, devfn, mem);
}
EXPORT_SYMBOL_GPL(p2sb_bar);
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 0a39f68c641d..bdc19cd8d3ed 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -855,6 +855,23 @@ static const struct ts_dmi_data rwc_nanote_next_data = {
.properties = rwc_nanote_next_props,
};
+static const struct property_entry sary_tab_3_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1730),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1151),
+ PROPERTY_ENTRY_BOOL("touchscreen-inverted-x"),
+ PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+ PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-sary-tab-3.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+ PROPERTY_ENTRY_BOOL("silead,home-button"),
+ { }
+};
+
+static const struct ts_dmi_data sary_tab_3_data = {
+ .acpi_name = "MSSL1680:00",
+ .properties = sary_tab_3_props,
+};
+
static const struct property_entry schneider_sct101ctm_props[] = {
PROPERTY_ENTRY_U32("touchscreen-size-x", 1715),
PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
@@ -1616,6 +1633,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
},
{
+ /* SARY Tab 3 */
+ .driver_data = (void *)&sary_tab_3_data,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "SARY"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C210C"),
+ DMI_MATCH(DMI_PRODUCT_SKU, "TAB3"),
+ },
+ },
+ {
/* Schneider SCT101CTM */
.driver_data = (void *)&schneider_sct101ctm_data,
.matches = {
diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c
index b889e64522c3..17e591f61efb 100644
--- a/drivers/pwm/pwm-stm32.c
+++ b/drivers/pwm/pwm-stm32.c
@@ -84,7 +84,7 @@ static int stm32_pwm_round_waveform_tohw(struct pwm_chip *chip,
wfhw->ccer = TIM_CCER_CCxE(ch + 1);
if (priv->have_complementary_output)
- wfhw->ccer = TIM_CCER_CCxNE(ch + 1);
+ wfhw->ccer |= TIM_CCER_CCxNE(ch + 1);
rate = clk_get_rate(priv->clk);
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 3d85762beda6..e5b4b93c07e3 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -175,7 +175,7 @@ static int of_get_regulation_constraints(struct device *dev,
if (!ret)
constraints->enable_time = pval;
- ret = of_property_read_u32(np, "regulator-uv-survival-time-ms", &pval);
+ ret = of_property_read_u32(np, "regulator-uv-less-critical-window-ms", &pval);
if (!ret)
constraints->uv_less_critical_window_ms = pval;
else
diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c
index 69d0f2175568..70bbb459caa4 100644
--- a/drivers/spi/spi-rockchip-sfc.c
+++ b/drivers/spi/spi-rockchip-sfc.c
@@ -182,6 +182,7 @@ struct rockchip_sfc {
bool use_dma;
u32 max_iosize;
u16 version;
+ struct spi_controller *host;
};
static int rockchip_sfc_reset(struct rockchip_sfc *sfc)
@@ -574,6 +575,7 @@ static int rockchip_sfc_probe(struct platform_device *pdev)
sfc = spi_controller_get_devdata(host);
sfc->dev = dev;
+ sfc->host = host;
sfc->regbase = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(sfc->regbase))
@@ -651,8 +653,8 @@ err_hclk:
static void rockchip_sfc_remove(struct platform_device *pdev)
{
- struct spi_controller *host = platform_get_drvdata(pdev);
struct rockchip_sfc *sfc = platform_get_drvdata(pdev);
+ struct spi_controller *host = sfc->host;
spi_unregister_controller(host);
diff --git a/drivers/staging/fbtft/Kconfig b/drivers/staging/fbtft/Kconfig
index 77ab44362f16..dcf6a70455cc 100644
--- a/drivers/staging/fbtft/Kconfig
+++ b/drivers/staging/fbtft/Kconfig
@@ -3,6 +3,7 @@ menuconfig FB_TFT
tristate "Support for small TFT LCD display modules"
depends on FB && SPI
depends on FB_DEVICE
+ depends on BACKLIGHT_CLASS_DEVICE
depends on GPIOLIB || COMPILE_TEST
select FB_BACKLIGHT
select FB_SYSMEM_HELPERS_DEFERRED
diff --git a/drivers/staging/gpib/common/Makefile b/drivers/staging/gpib/common/Makefile
index 0c4c77bea75b..460586edb574 100644
--- a/drivers/staging/gpib/common/Makefile
+++ b/drivers/staging/gpib/common/Makefile
@@ -1,5 +1,5 @@
-obj-m += gpib_common.o
+obj-$(CONFIG_GPIB_COMMON) += gpib_common.o
gpib_common-objs := gpib_os.o iblib.o
diff --git a/drivers/staging/gpib/nec7210/Makefile b/drivers/staging/gpib/nec7210/Makefile
index 8d4d90f21109..64330f2e89d1 100644
--- a/drivers/staging/gpib/nec7210/Makefile
+++ b/drivers/staging/gpib/nec7210/Makefile
@@ -1,4 +1,4 @@
-obj-m += nec7210.o
+obj-$(CONFIG_GPIB_NEC7210) += nec7210.o
diff --git a/drivers/thermal/thermal_thresholds.c b/drivers/thermal/thermal_thresholds.c
index d9b2a0bb44fc..38f5fd0e8930 100644
--- a/drivers/thermal/thermal_thresholds.c
+++ b/drivers/thermal/thermal_thresholds.c
@@ -69,58 +69,60 @@ static struct user_threshold *__thermal_thresholds_find(const struct list_head *
return NULL;
}
-static bool __thermal_threshold_is_crossed(struct user_threshold *threshold, int temperature,
- int last_temperature, int direction,
- int *low, int *high)
+static bool thermal_thresholds_handle_raising(struct list_head *thresholds, int temperature,
+ int last_temperature)
{
+ struct user_threshold *t;
- if (temperature >= threshold->temperature) {
- if (threshold->temperature > *low &&
- THERMAL_THRESHOLD_WAY_DOWN & threshold->direction)
- *low = threshold->temperature;
+ list_for_each_entry(t, thresholds, list_node) {
- if (last_temperature < threshold->temperature &&
- threshold->direction & direction)
- return true;
- } else {
- if (threshold->temperature < *high && THERMAL_THRESHOLD_WAY_UP
- & threshold->direction)
- *high = threshold->temperature;
+ if (!(t->direction & THERMAL_THRESHOLD_WAY_UP))
+ continue;
- if (last_temperature >= threshold->temperature &&
- threshold->direction & direction)
+ if (temperature >= t->temperature &&
+ last_temperature < t->temperature)
return true;
}
return false;
}
-static bool thermal_thresholds_handle_raising(struct list_head *thresholds, int temperature,
- int last_temperature, int *low, int *high)
+static bool thermal_thresholds_handle_dropping(struct list_head *thresholds, int temperature,
+ int last_temperature)
{
struct user_threshold *t;
- list_for_each_entry(t, thresholds, list_node) {
- if (__thermal_threshold_is_crossed(t, temperature, last_temperature,
- THERMAL_THRESHOLD_WAY_UP, low, high))
+ list_for_each_entry_reverse(t, thresholds, list_node) {
+
+ if (!(t->direction & THERMAL_THRESHOLD_WAY_DOWN))
+ continue;
+
+ if (temperature <= t->temperature &&
+ last_temperature > t->temperature)
return true;
}
return false;
}
-static bool thermal_thresholds_handle_dropping(struct list_head *thresholds, int temperature,
- int last_temperature, int *low, int *high)
+static void thermal_threshold_find_boundaries(struct list_head *thresholds, int temperature,
+ int *low, int *high)
{
struct user_threshold *t;
- list_for_each_entry_reverse(t, thresholds, list_node) {
- if (__thermal_threshold_is_crossed(t, temperature, last_temperature,
- THERMAL_THRESHOLD_WAY_DOWN, low, high))
- return true;
+ list_for_each_entry(t, thresholds, list_node) {
+ if (temperature < t->temperature &&
+ (t->direction & THERMAL_THRESHOLD_WAY_UP) &&
+ *high > t->temperature)
+ *high = t->temperature;
}
- return false;
+ list_for_each_entry_reverse(t, thresholds, list_node) {
+ if (temperature > t->temperature &&
+ (t->direction & THERMAL_THRESHOLD_WAY_DOWN) &&
+ *low < t->temperature)
+ *low = t->temperature;
+ }
}
void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *high)
@@ -132,6 +134,8 @@ void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *hi
lockdep_assert_held(&tz->lock);
+ thermal_threshold_find_boundaries(thresholds, temperature, low, high);
+
/*
* We need a second update in order to detect a threshold being crossed
*/
@@ -151,12 +155,12 @@ void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *hi
* - decreased : thresholds are crossed the way down
*/
if (temperature > last_temperature) {
- if (thermal_thresholds_handle_raising(thresholds, temperature,
- last_temperature, low, high))
+ if (thermal_thresholds_handle_raising(thresholds,
+ temperature, last_temperature))
thermal_notify_threshold_up(tz);
} else {
- if (thermal_thresholds_handle_dropping(thresholds, temperature,
- last_temperature, low, high))
+ if (thermal_thresholds_handle_dropping(thresholds,
+ temperature, last_temperature))
thermal_notify_threshold_down(tz);
}
}
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 1257dd3ce7e6..f3a2264e012b 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -1520,6 +1520,14 @@ static struct pci_device_id nhi_ids[] = {
.driver_data = (kernel_ulong_t)&icl_nhi_ops },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_LNL_NHI1),
.driver_data = (kernel_ulong_t)&icl_nhi_ops },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_M_NHI0),
+ .driver_data = (kernel_ulong_t)&icl_nhi_ops },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_M_NHI1),
+ .driver_data = (kernel_ulong_t)&icl_nhi_ops },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_P_NHI0),
+ .driver_data = (kernel_ulong_t)&icl_nhi_ops },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_P_NHI1),
+ .driver_data = (kernel_ulong_t)&icl_nhi_ops },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_80G_NHI) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_40G_NHI) },
diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h
index 7a07c7c1a9c2..16744f25a9a0 100644
--- a/drivers/thunderbolt/nhi.h
+++ b/drivers/thunderbolt/nhi.h
@@ -92,6 +92,10 @@ extern const struct tb_nhi_ops icl_nhi_ops;
#define PCI_DEVICE_ID_INTEL_RPL_NHI1 0xa76d
#define PCI_DEVICE_ID_INTEL_LNL_NHI0 0xa833
#define PCI_DEVICE_ID_INTEL_LNL_NHI1 0xa834
+#define PCI_DEVICE_ID_INTEL_PTL_M_NHI0 0xe333
+#define PCI_DEVICE_ID_INTEL_PTL_M_NHI1 0xe334
+#define PCI_DEVICE_ID_INTEL_PTL_P_NHI0 0xe433
+#define PCI_DEVICE_ID_INTEL_PTL_P_NHI1 0xe434
#define PCI_CLASS_SERIAL_USB_USB4 0x0c0340
diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c
index 89d2919d0193..eeb64433ebbc 100644
--- a/drivers/thunderbolt/retimer.c
+++ b/drivers/thunderbolt/retimer.c
@@ -103,6 +103,7 @@ static int tb_retimer_nvm_add(struct tb_retimer *rt)
err_nvm:
dev_dbg(&rt->dev, "NVM upgrade disabled\n");
+ rt->no_nvm_upgrade = true;
if (!IS_ERR(nvm))
tb_nvm_free(nvm);
@@ -182,8 +183,6 @@ static ssize_t nvm_authenticate_show(struct device *dev,
if (!rt->nvm)
ret = -EAGAIN;
- else if (rt->no_nvm_upgrade)
- ret = -EOPNOTSUPP;
else
ret = sysfs_emit(buf, "%#x\n", rt->auth_status);
@@ -323,8 +322,6 @@ static ssize_t nvm_version_show(struct device *dev,
if (!rt->nvm)
ret = -EAGAIN;
- else if (rt->no_nvm_upgrade)
- ret = -EOPNOTSUPP;
else
ret = sysfs_emit(buf, "%x.%x\n", rt->nvm->major, rt->nvm->minor);
@@ -342,6 +339,19 @@ static ssize_t vendor_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(vendor);
+static umode_t retimer_is_visible(struct kobject *kobj, struct attribute *attr,
+ int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct tb_retimer *rt = tb_to_retimer(dev);
+
+ if (attr == &dev_attr_nvm_authenticate.attr ||
+ attr == &dev_attr_nvm_version.attr)
+ return rt->no_nvm_upgrade ? 0 : attr->mode;
+
+ return attr->mode;
+}
+
static struct attribute *retimer_attrs[] = {
&dev_attr_device.attr,
&dev_attr_nvm_authenticate.attr,
@@ -351,6 +361,7 @@ static struct attribute *retimer_attrs[] = {
};
static const struct attribute_group retimer_group = {
+ .is_visible = retimer_is_visible,
.attrs = retimer_attrs,
};
diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index 4f777788e917..a7c6919fbf97 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -2059,6 +2059,37 @@ static void tb_exit_redrive(struct tb_port *port)
}
}
+static void tb_switch_enter_redrive(struct tb_switch *sw)
+{
+ struct tb_port *port;
+
+ tb_switch_for_each_port(sw, port)
+ tb_enter_redrive(port);
+}
+
+/*
+ * Called during system and runtime suspend to forcefully exit redrive
+ * mode without querying whether the resource is available.
+ */
+static void tb_switch_exit_redrive(struct tb_switch *sw)
+{
+ struct tb_port *port;
+
+ if (!(sw->quirks & QUIRK_KEEP_POWER_IN_DP_REDRIVE))
+ return;
+
+ tb_switch_for_each_port(sw, port) {
+ if (!tb_port_is_dpin(port))
+ continue;
+
+ if (port->redrive) {
+ port->redrive = false;
+ pm_runtime_put(&sw->dev);
+ tb_port_dbg(port, "exit redrive mode\n");
+ }
+ }
+}
+
static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port)
{
struct tb_port *in, *out;
@@ -2909,6 +2940,7 @@ static int tb_start(struct tb *tb, bool reset)
tb_create_usb3_tunnels(tb->root_switch);
/* Add DP IN resources for the root switch */
tb_add_dp_resources(tb->root_switch);
+ tb_switch_enter_redrive(tb->root_switch);
/* Make the discovered switches available to the userspace */
device_for_each_child(&tb->root_switch->dev, NULL,
tb_scan_finalize_switch);
@@ -2924,6 +2956,7 @@ static int tb_suspend_noirq(struct tb *tb)
tb_dbg(tb, "suspending...\n");
tb_disconnect_and_release_dp(tb);
+ tb_switch_exit_redrive(tb->root_switch);
tb_switch_suspend(tb->root_switch, false);
tcm->hotplug_active = false; /* signal tb_handle_hotplug to quit */
tb_dbg(tb, "suspend finished\n");
@@ -3016,6 +3049,7 @@ static int tb_resume_noirq(struct tb *tb)
tb_dbg(tb, "tunnels restarted, sleeping for 100ms\n");
msleep(100);
}
+ tb_switch_enter_redrive(tb->root_switch);
/* Allow tb_handle_hotplug to progress events */
tcm->hotplug_active = true;
tb_dbg(tb, "resume finished\n");
@@ -3079,6 +3113,12 @@ static int tb_runtime_suspend(struct tb *tb)
struct tb_cm *tcm = tb_priv(tb);
mutex_lock(&tb->lock);
+ /*
+ * The below call only releases DP resources to allow exiting and
+ * re-entering redrive mode.
+ */
+ tb_disconnect_and_release_dp(tb);
+ tb_switch_exit_redrive(tb->root_switch);
tb_switch_suspend(tb->root_switch, true);
tcm->hotplug_active = false;
mutex_unlock(&tb->lock);
@@ -3110,6 +3150,7 @@ static int tb_runtime_resume(struct tb *tb)
tb_restore_children(tb->root_switch);
list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list)
tb_tunnel_restart(tunnel);
+ tb_switch_enter_redrive(tb->root_switch);
tcm->hotplug_active = true;
mutex_unlock(&tb->lock);
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 15db90c54a45..92703efda1f7 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -436,7 +436,7 @@ int xhci_ring_expansion(struct xhci_hcd *xhci, struct xhci_ring *ring,
goto free_segments;
}
- xhci_link_rings(xhci, ring, &new_ring);
+ xhci_link_rings(xhci, &new_ring, ring);
trace_xhci_ring_expansion(ring);
xhci_dbg_trace(xhci, trace_xhci_dbg_ring_expansion,
"ring expansion succeed, now has %d segments",
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 4cf5363875c7..09b05a62375e 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1199,8 +1199,6 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
* Keep retrying until the EP starts and stops again, on
* chips where this is known to help. Wait for 100ms.
*/
- if (!(xhci->quirks & XHCI_NEC_HOST))
- break;
if (time_is_before_jiffies(ep->stop_time + msecs_to_jiffies(100)))
break;
fallthrough;
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 9ba5584061c8..64317b390d22 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -625,6 +625,8 @@ static void option_instat_callback(struct urb *urb);
#define MEIGSMART_PRODUCT_SRM825L 0x4d22
/* MeiG Smart SLM320 based on UNISOC UIS8910 */
#define MEIGSMART_PRODUCT_SLM320 0x4d41
+/* MeiG Smart SLM770A based on ASR1803 */
+#define MEIGSMART_PRODUCT_SLM770A 0x4d57
/* Device flags */
@@ -1395,6 +1397,12 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10aa, 0xff), /* Telit FN920C04 (MBIM) */
.driver_info = NCTRL(3) | RSVD(4) | RSVD(5) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c0, 0xff), /* Telit FE910C04 (rmnet) */
+ .driver_info = RSVD(0) | NCTRL(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c4, 0xff), /* Telit FE910C04 (rmnet) */
+ .driver_info = RSVD(0) | NCTRL(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c8, 0xff), /* Telit FE910C04 (rmnet) */
+ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
@@ -2247,6 +2255,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(2) },
{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x7127, 0xff, 0x00, 0x00),
.driver_info = NCTRL(2) | NCTRL(3) | NCTRL(4) },
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x7129, 0xff, 0x00, 0x00), /* MediaTek T7XX */
+ .driver_info = NCTRL(2) | NCTRL(3) | NCTRL(4) },
{ USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) },
{ USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MPL200),
.driver_info = RSVD(1) | RSVD(4) },
@@ -2375,6 +2385,18 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for Golbal EDU */
{ USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0x00, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010a, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WRD for WWAN Ready */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010a, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010a, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010b, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for WWAN Ready */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010b, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010b, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010c, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WRD for WWAN Ready */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010c, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010c, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010d, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for WWAN Ready */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010d, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x010d, 0xff, 0xff, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) },
@@ -2382,9 +2404,14 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM320, 0xff, 0, 0) },
+ { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM770A, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x60) },
+ { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */
+ .driver_info = NCTRL(1) },
+ { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0640, 0xff), /* TCL IK512 ECM */
+ .driver_info = NCTRL(3) },
{ } /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, option_ids);
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index de035071fedb..55c6686f091e 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -649,6 +649,7 @@ config FB_S1D13XXX
config FB_ATMEL
tristate "AT91 LCD Controller support"
depends on FB && OF && HAVE_CLK && HAS_IOMEM
+ depends on BACKLIGHT_CLASS_DEVICE
depends on HAVE_FB_ATMEL || COMPILE_TEST
select FB_BACKLIGHT
select FB_IOMEM_HELPERS
@@ -660,7 +661,6 @@ config FB_ATMEL
config FB_NVIDIA
tristate "nVidia Framebuffer Support"
depends on FB && PCI
- select FB_BACKLIGHT if FB_NVIDIA_BACKLIGHT
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
@@ -700,6 +700,8 @@ config FB_NVIDIA_DEBUG
config FB_NVIDIA_BACKLIGHT
bool "Support for backlight control"
depends on FB_NVIDIA
+ depends on BACKLIGHT_CLASS_DEVICE=y || BACKLIGHT_CLASS_DEVICE=FB_NVIDIA
+ select FB_BACKLIGHT
default y
help
Say Y here if you want to control the backlight of your display.
@@ -707,7 +709,6 @@ config FB_NVIDIA_BACKLIGHT
config FB_RIVA
tristate "nVidia Riva support"
depends on FB && PCI
- select FB_BACKLIGHT if FB_RIVA_BACKLIGHT
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
@@ -747,6 +748,8 @@ config FB_RIVA_DEBUG
config FB_RIVA_BACKLIGHT
bool "Support for backlight control"
depends on FB_RIVA
+ depends on BACKLIGHT_CLASS_DEVICE=y || BACKLIGHT_CLASS_DEVICE=FB_RIVA
+ select FB_BACKLIGHT
default y
help
Say Y here if you want to control the backlight of your display.
@@ -934,7 +937,6 @@ config FB_MATROX_MAVEN
config FB_RADEON
tristate "ATI Radeon display support"
depends on FB && PCI
- select FB_BACKLIGHT if FB_RADEON_BACKLIGHT
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
@@ -960,6 +962,8 @@ config FB_RADEON_I2C
config FB_RADEON_BACKLIGHT
bool "Support for backlight control"
depends on FB_RADEON
+ depends on BACKLIGHT_CLASS_DEVICE=y || BACKLIGHT_CLASS_DEVICE=FB_RADEON
+ select FB_BACKLIGHT
default y
help
Say Y here if you want to control the backlight of your display.
@@ -975,7 +979,6 @@ config FB_RADEON_DEBUG
config FB_ATY128
tristate "ATI Rage128 display support"
depends on FB && PCI
- select FB_BACKLIGHT if FB_ATY128_BACKLIGHT
select FB_IOMEM_HELPERS
select FB_MACMODES if PPC_PMAC
help
@@ -989,6 +992,8 @@ config FB_ATY128
config FB_ATY128_BACKLIGHT
bool "Support for backlight control"
depends on FB_ATY128
+ depends on BACKLIGHT_CLASS_DEVICE=y || BACKLIGHT_CLASS_DEVICE=FB_ATY128
+ select FB_BACKLIGHT
default y
help
Say Y here if you want to control the backlight of your display.
@@ -999,7 +1004,6 @@ config FB_ATY
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
- select FB_BACKLIGHT if FB_ATY_BACKLIGHT
select FB_IOMEM_FOPS
select FB_MACMODES if PPC
select FB_ATY_CT if SPARC64 && PCI
@@ -1040,6 +1044,8 @@ config FB_ATY_GX
config FB_ATY_BACKLIGHT
bool "Support for backlight control"
depends on FB_ATY
+ depends on BACKLIGHT_CLASS_DEVICE=y || BACKLIGHT_CLASS_DEVICE=FB_ATY
+ select FB_BACKLIGHT
default y
help
Say Y here if you want to control the backlight of your display.
@@ -1528,6 +1534,7 @@ config FB_SH_MOBILE_LCDC
depends on FB && HAVE_CLK && HAS_IOMEM
depends on SUPERH || COMPILE_TEST
depends on FB_DEVICE
+ depends on BACKLIGHT_CLASS_DEVICE
select FB_BACKLIGHT
select FB_DEFERRED_IO
select FB_DMAMEM_HELPERS
@@ -1793,6 +1800,7 @@ config FB_SSD1307
tristate "Solomon SSD1307 framebuffer support"
depends on FB && I2C
depends on GPIOLIB || COMPILE_TEST
+ depends on BACKLIGHT_CLASS_DEVICE
select FB_BACKLIGHT
select FB_SYSMEM_HELPERS_DEFERRED
help
diff --git a/drivers/video/fbdev/core/Kconfig b/drivers/video/fbdev/core/Kconfig
index 0ab8848ba2f1..d554d8c543d4 100644
--- a/drivers/video/fbdev/core/Kconfig
+++ b/drivers/video/fbdev/core/Kconfig
@@ -183,9 +183,8 @@ config FB_SYSMEM_HELPERS_DEFERRED
select FB_SYSMEM_HELPERS
config FB_BACKLIGHT
- tristate
+ bool
depends on FB
- select BACKLIGHT_CLASS_DEVICE
config FB_MODE_HELPERS
bool "Enable Video Mode Handling Helpers"
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index af3db0a7ae4d..7ea6f0b43b95 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -358,7 +358,7 @@ static void btrfs_simple_end_io(struct bio *bio)
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
- if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
+ if (bio_is_zone_append(bio) && !bio->bi_status)
btrfs_record_physical_zoned(bbio);
btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
@@ -401,7 +401,7 @@ static void btrfs_orig_write_end_io(struct bio *bio)
else
bio->bi_status = BLK_STS_OK;
- if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
+ if (bio_is_zone_append(bio) && !bio->bi_status)
stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
btrfs_bio_end_io(bbio, bbio->bio.bi_status);
@@ -415,7 +415,7 @@ static void btrfs_clone_write_end_io(struct bio *bio)
if (bio->bi_status) {
atomic_inc(&stripe->bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
- } else if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ } else if (bio_is_zone_append(bio)) {
stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
}
@@ -652,8 +652,14 @@ static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
map_length = min(map_length, bbio->fs_info->max_zone_append_size);
sector_offset = bio_split_rw_at(&bbio->bio, &bbio->fs_info->limits,
&nr_segs, map_length);
- if (sector_offset)
- return sector_offset << SECTOR_SHIFT;
+ if (sector_offset) {
+ /*
+ * bio_split_rw_at() could split at a size smaller than our
+ * sectorsize and thus cause unaligned I/Os. Fix that by
+ * always rounding down to the nearest boundary.
+ */
+ return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, bbio->fs_info->sectorsize);
+ }
return map_length;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 307dedf95c70..2c341956a01c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -371,6 +371,25 @@ static inline void btrfs_set_root_last_trans(struct btrfs_root *root, u64 transi
}
/*
+ * Return the generation this root started with.
+ *
+ * Every normal root that is created with root->root_key.offset set to it's
+ * originating generation. If it is a snapshot it is the generation when the
+ * snapshot was created.
+ *
+ * However for TREE_RELOC roots root_key.offset is the objectid of the owning
+ * tree root. Thankfully we copy the root item of the owning tree root, which
+ * has it's last_snapshot set to what we would have root_key.offset set to, so
+ * return that if this is a TREE_RELOC root.
+ */
+static inline u64 btrfs_root_origin_generation(const struct btrfs_root *root)
+{
+ if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
+ return btrfs_root_last_snapshot(&root->root_item);
+ return root->root_key.offset;
+}
+
+/*
* Structure that conveys information about an extent that is going to replace
* all the extents in a file range.
*/
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bd09dd3ad1a0..3c6f7fecbb9a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5285,7 +5285,7 @@ static bool visit_node_for_delete(struct btrfs_root *root, struct walk_control *
* reference to it.
*/
generation = btrfs_node_ptr_generation(eb, slot);
- if (!wc->update_ref || generation <= root->root_key.offset)
+ if (!wc->update_ref || generation <= btrfs_root_origin_generation(root))
return false;
/*
@@ -5340,7 +5340,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
goto reada;
if (wc->stage == UPDATE_BACKREF &&
- generation <= root->root_key.offset)
+ generation <= btrfs_root_origin_generation(root))
continue;
/* We don't lock the tree block, it's OK to be racy here */
@@ -5683,7 +5683,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
* for the subtree
*/
if (wc->stage == UPDATE_BACKREF &&
- generation <= root->root_key.offset) {
+ generation <= btrfs_root_origin_generation(root)) {
wc->lookup_info = 1;
return 1;
}
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 148d8cefa40e..dfeee033f31f 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1527,6 +1527,11 @@ static int check_extent_item(struct extent_buffer *leaf,
dref_offset, fs_info->sectorsize);
return -EUCLEAN;
}
+ if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
+ extent_err(leaf, slot,
+ "invalid data ref count, should have non-zero value");
+ return -EUCLEAN;
+ }
inline_refs += btrfs_extent_data_ref_count(leaf, dref);
break;
/* Contains parent bytenr and ref count */
@@ -1539,6 +1544,11 @@ static int check_extent_item(struct extent_buffer *leaf,
inline_offset, fs_info->sectorsize);
return -EUCLEAN;
}
+ if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
+ extent_err(leaf, slot,
+ "invalid shared data ref count, should have non-zero value");
+ return -EUCLEAN;
+ }
inline_refs += btrfs_shared_data_ref_count(leaf, sref);
break;
case BTRFS_EXTENT_OWNER_REF_KEY:
@@ -1611,8 +1621,18 @@ static int check_simple_keyed_refs(struct extent_buffer *leaf,
{
u32 expect_item_size = 0;
- if (key->type == BTRFS_SHARED_DATA_REF_KEY)
+ if (key->type == BTRFS_SHARED_DATA_REF_KEY) {
+ struct btrfs_shared_data_ref *sref;
+
+ sref = btrfs_item_ptr(leaf, slot, struct btrfs_shared_data_ref);
+ if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
+ extent_err(leaf, slot,
+ "invalid shared data backref count, should have non-zero value");
+ return -EUCLEAN;
+ }
+
expect_item_size = sizeof(struct btrfs_shared_data_ref);
+ }
if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) {
generic_err(leaf, slot,
@@ -1689,6 +1709,11 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
offset, leaf->fs_info->sectorsize);
return -EUCLEAN;
}
+ if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
+ extent_err(leaf, slot,
+ "invalid extent data backref count, should have non-zero value");
+ return -EUCLEAN;
+ }
}
return 0;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4b8d59ebda00..851d70200c6b 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1066,7 +1066,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
if (ceph_inode_is_shutdown(inode))
return -EIO;
- if (!len)
+ if (!len || !i_size)
return 0;
/*
* flush any page cache pages in this range. this
@@ -1086,7 +1086,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
int num_pages;
size_t page_off;
bool more;
- int idx;
+ int idx = 0;
size_t left;
struct ceph_osd_req_op *op;
u64 read_off = off;
@@ -1116,6 +1116,16 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
len = read_off + read_len - off;
more = len < iov_iter_count(to);
+ op = &req->r_ops[0];
+ if (sparse) {
+ extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
+ ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
+ if (ret) {
+ ceph_osdc_put_request(req);
+ break;
+ }
+ }
+
num_pages = calc_pages_for(read_off, read_len);
page_off = offset_in_page(off);
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
@@ -1127,17 +1137,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
osd_req_op_extent_osd_data_pages(req, 0, pages, read_len,
offset_in_page(read_off),
- false, false);
-
- op = &req->r_ops[0];
- if (sparse) {
- extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
- ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
- if (ret) {
- ceph_osdc_put_request(req);
- break;
- }
- }
+ false, true);
ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
@@ -1160,7 +1160,14 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
else if (ret == -ENOENT)
ret = 0;
- if (ret > 0 && IS_ENCRYPTED(inode)) {
+ if (ret < 0) {
+ ceph_osdc_put_request(req);
+ if (ret == -EBLOCKLISTED)
+ fsc->blocklisted = true;
+ break;
+ }
+
+ if (IS_ENCRYPTED(inode)) {
int fret;
fret = ceph_fscrypt_decrypt_extents(inode, pages,
@@ -1186,10 +1193,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
ret = min_t(ssize_t, fret, len);
}
- ceph_osdc_put_request(req);
-
/* Short read but not EOF? Zero out the remainder. */
- if (ret >= 0 && ret < len && (off + ret < i_size)) {
+ if (ret < len && (off + ret < i_size)) {
int zlen = min(len - ret, i_size - off - ret);
int zoff = page_off + ret;
@@ -1199,13 +1204,11 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
ret += zlen;
}
- idx = 0;
- if (ret <= 0)
- left = 0;
- else if (off + ret > i_size)
- left = i_size - off;
+ if (off + ret > i_size)
+ left = (i_size > off) ? i_size - off : 0;
else
left = ret;
+
while (left > 0) {
size_t plen, copied;
@@ -1221,13 +1224,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
break;
}
}
- ceph_release_page_vector(pages, num_pages);
- if (ret < 0) {
- if (ret == -EBLOCKLISTED)
- fsc->blocklisted = true;
- break;
- }
+ ceph_osdc_put_request(req);
if (off >= i_size || !more)
break;
@@ -1553,6 +1551,16 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
break;
}
+ op = &req->r_ops[0];
+ if (!write && sparse) {
+ extent_cnt = __ceph_sparse_read_ext_count(inode, size);
+ ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
+ if (ret) {
+ ceph_osdc_put_request(req);
+ break;
+ }
+ }
+
len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages);
if (len < 0) {
ceph_osdc_put_request(req);
@@ -1562,6 +1570,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (len != size)
osd_req_op_extent_update(req, 0, len);
+ osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
+
/*
* To simplify error handling, allow AIO when IO within i_size
* or IO can be satisfied by single OSD request.
@@ -1593,17 +1603,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
req->r_mtime = mtime;
}
- osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
- op = &req->r_ops[0];
- if (sparse) {
- extent_cnt = __ceph_sparse_read_ext_count(inode, size);
- ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
- if (ret) {
- ceph_osdc_put_request(req);
- break;
- }
- }
-
if (aio_req) {
aio_req->total_len += len;
aio_req->num_reqs++;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 219a2cc2bf3c..785fe489ef4b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2800,12 +2800,11 @@ retry:
if (pos < 0) {
/*
- * A rename didn't occur, but somehow we didn't end up where
- * we thought we would. Throw a warning and try again.
+ * The path is longer than PATH_MAX and this function
+ * cannot ever succeed. Creating paths that long is
+ * possible with Ceph, but Linux cannot use them.
*/
- pr_warn_client(cl, "did not end path lookup where expected (pos = %d)\n",
- pos);
- goto retry;
+ return ERR_PTR(-ENAMETOOLONG);
}
*pbase = base;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index de03cd6eb86e..4344e1f11806 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -431,6 +431,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
switch (token) {
case Opt_snapdirname:
+ if (strlen(param->string) > NAME_MAX)
+ return invalfc(fc, "snapdirname too long");
kfree(fsopt->snapdir_name);
fsopt->snapdir_name = param->string;
param->string = NULL;
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 1c49f8962021..0cd6b5c4df98 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -56,10 +56,10 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
buf->file = NULL;
if (erofs_is_fileio_mode(sbi)) {
- buf->file = sbi->fdev; /* some fs like FUSE needs it */
+ buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
buf->mapping = buf->file->f_mapping;
} else if (erofs_is_fscache_mode(sb))
- buf->mapping = sbi->s_fscache->inode->i_mapping;
+ buf->mapping = sbi->dif0.fscache->inode->i_mapping;
else
buf->mapping = sb->s_bdev->bd_mapping;
}
@@ -179,19 +179,13 @@ out:
}
static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
- struct erofs_device_info *dif)
+ struct super_block *sb, struct erofs_device_info *dif)
{
+ map->m_sb = sb;
+ map->m_dif = dif;
map->m_bdev = NULL;
- map->m_fp = NULL;
- if (dif->file) {
- if (S_ISBLK(file_inode(dif->file)->i_mode))
- map->m_bdev = file_bdev(dif->file);
- else
- map->m_fp = dif->file;
- }
- map->m_daxdev = dif->dax_dev;
- map->m_dax_part_off = dif->dax_part_off;
- map->m_fscache = dif->fscache;
+ if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
+ map->m_bdev = file_bdev(dif->file);
}
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
@@ -201,12 +195,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
erofs_off_t startoff, length;
int id;
- map->m_bdev = sb->s_bdev;
- map->m_daxdev = EROFS_SB(sb)->dax_dev;
- map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
- map->m_fscache = EROFS_SB(sb)->s_fscache;
- map->m_fp = EROFS_SB(sb)->fdev;
-
+ erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0);
+ map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */
if (map->m_deviceid) {
down_read(&devs->rwsem);
dif = idr_find(&devs->tree, map->m_deviceid - 1);
@@ -219,7 +209,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
up_read(&devs->rwsem);
return 0;
}
- erofs_fill_from_devinfo(map, dif);
+ erofs_fill_from_devinfo(map, sb, dif);
up_read(&devs->rwsem);
} else if (devs->extra_devices && !devs->flatdev) {
down_read(&devs->rwsem);
@@ -232,7 +222,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
if (map->m_pa >= startoff &&
map->m_pa < startoff + length) {
map->m_pa -= startoff;
- erofs_fill_from_devinfo(map, dif);
+ erofs_fill_from_devinfo(map, sb, dif);
break;
}
}
@@ -302,7 +292,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->offset = map.m_la;
if (flags & IOMAP_DAX)
- iomap->dax_dev = mdev.m_daxdev;
+ iomap->dax_dev = mdev.m_dif->dax_dev;
else
iomap->bdev = mdev.m_bdev;
iomap->length = map.m_llen;
@@ -331,7 +321,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->type = IOMAP_MAPPED;
iomap->addr = mdev.m_pa;
if (flags & IOMAP_DAX)
- iomap->addr += mdev.m_dax_part_off;
+ iomap->addr += mdev.m_dif->dax_part_off;
}
return 0;
}
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index 3af96b1e2c2a..33f8539dda4a 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -9,6 +9,7 @@ struct erofs_fileio_rq {
struct bio_vec bvecs[BIO_MAX_VECS];
struct bio bio;
struct kiocb iocb;
+ struct super_block *sb;
};
struct erofs_fileio {
@@ -52,8 +53,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << SECTOR_SHIFT;
rq->iocb.ki_ioprio = get_current_ioprio();
rq->iocb.ki_complete = erofs_fileio_ki_complete;
- rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ?
- IOCB_DIRECT : 0;
+ if (test_opt(&EROFS_SB(rq->sb)->opt, DIRECT_IO) &&
+ rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT)
+ rq->iocb.ki_flags = IOCB_DIRECT;
iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt,
rq->bio.bi_iter.bi_size);
ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter);
@@ -67,7 +69,8 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
GFP_KERNEL | __GFP_NOFAIL);
bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
- rq->iocb.ki_filp = mdev->m_fp;
+ rq->iocb.ki_filp = mdev->m_dif->file;
+ rq->sb = mdev->m_sb;
return rq;
}
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index fda16eedafb5..ce3d8737df85 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -198,7 +198,7 @@ struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
- io->io.private = mdev->m_fscache->cookie;
+ io->io.private = mdev->m_dif->fscache->cookie;
io->io.end_io = erofs_fscache_bio_endio;
refcount_set(&io->io.ref, 1);
return &io->bio;
@@ -316,7 +316,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
if (!io)
return -ENOMEM;
iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
- ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
+ ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie,
mdev.m_pa + (pos - map.m_la), io);
erofs_fscache_req_io_put(io);
@@ -657,7 +657,7 @@ int erofs_fscache_register_fs(struct super_block *sb)
if (IS_ERR(fscache))
return PTR_ERR(fscache);
- sbi->s_fscache = fscache;
+ sbi->dif0.fscache = fscache;
return 0;
}
@@ -665,14 +665,14 @@ void erofs_fscache_unregister_fs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
- erofs_fscache_unregister_cookie(sbi->s_fscache);
+ erofs_fscache_unregister_cookie(sbi->dif0.fscache);
if (sbi->domain)
erofs_fscache_domain_put(sbi->domain);
else
fscache_relinquish_volume(sbi->volume, NULL, false);
- sbi->s_fscache = NULL;
+ sbi->dif0.fscache = NULL;
sbi->volume = NULL;
sbi->domain = NULL;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 1c847c30a918..686d835eb533 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -107,6 +107,7 @@ struct erofs_xattr_prefix_item {
};
struct erofs_sb_info {
+ struct erofs_device_info dif0;
struct erofs_mount_opts opt; /* options */
#ifdef CONFIG_EROFS_FS_ZIP
/* list for all registered superblocks, mainly for shrinker */
@@ -124,13 +125,9 @@ struct erofs_sb_info {
struct erofs_sb_lz4_info lz4;
#endif /* CONFIG_EROFS_FS_ZIP */
- struct file *fdev;
struct inode *packed_inode;
struct erofs_dev_context *devs;
- struct dax_device *dax_dev;
- u64 dax_part_off;
u64 total_blocks;
- u32 primarydevice_blocks;
u32 meta_blkaddr;
#ifdef CONFIG_EROFS_FS_XATTR
@@ -166,7 +163,6 @@ struct erofs_sb_info {
/* fscache support */
struct fscache_volume *volume;
- struct erofs_fscache *s_fscache;
struct erofs_domain *domain;
char *fsid;
char *domain_id;
@@ -180,6 +176,7 @@ struct erofs_sb_info {
#define EROFS_MOUNT_POSIX_ACL 0x00000020
#define EROFS_MOUNT_DAX_ALWAYS 0x00000040
#define EROFS_MOUNT_DAX_NEVER 0x00000080
+#define EROFS_MOUNT_DIRECT_IO 0x00000100
#define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option)
@@ -187,7 +184,7 @@ struct erofs_sb_info {
static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi)
{
- return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->fdev;
+ return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->dif0.file;
}
static inline bool erofs_is_fscache_mode(struct super_block *sb)
@@ -357,11 +354,9 @@ enum {
};
struct erofs_map_dev {
- struct erofs_fscache *m_fscache;
+ struct super_block *m_sb;
+ struct erofs_device_info *m_dif;
struct block_device *m_bdev;
- struct dax_device *m_daxdev;
- struct file *m_fp;
- u64 m_dax_part_off;
erofs_off_t m_pa;
unsigned int m_deviceid;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index c235a8e4315e..f5956474bfde 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -203,7 +203,7 @@ static int erofs_scan_devices(struct super_block *sb,
struct erofs_device_info *dif;
int id, err = 0;
- sbi->total_blocks = sbi->primarydevice_blocks;
+ sbi->total_blocks = sbi->dif0.blocks;
if (!erofs_sb_has_device_table(sbi))
ondisk_extradevs = 0;
else
@@ -307,7 +307,7 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->sb_size);
goto out;
}
- sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
+ sbi->dif0.blocks = le32_to_cpu(dsb->blocks);
sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
#ifdef CONFIG_EROFS_FS_XATTR
sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
@@ -364,14 +364,8 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
}
enum {
- Opt_user_xattr,
- Opt_acl,
- Opt_cache_strategy,
- Opt_dax,
- Opt_dax_enum,
- Opt_device,
- Opt_fsid,
- Opt_domain_id,
+ Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
+ Opt_device, Opt_fsid, Opt_domain_id, Opt_directio,
Opt_err
};
@@ -398,6 +392,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
fsparam_string("device", Opt_device),
fsparam_string("fsid", Opt_fsid),
fsparam_string("domain_id", Opt_domain_id),
+ fsparam_flag_no("directio", Opt_directio),
{}
};
@@ -511,6 +506,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
break;
#endif
+ case Opt_directio:
+#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
+ if (result.boolean)
+ set_opt(&sbi->opt, DIRECT_IO);
+ else
+ clear_opt(&sbi->opt, DIRECT_IO);
+#else
+ errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
+#endif
+ break;
default:
return -ENOPARAM;
}
@@ -602,9 +607,8 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return -EINVAL;
}
- sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
- &sbi->dax_part_off,
- NULL, NULL);
+ sbi->dif0.dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
+ &sbi->dif0.dax_part_off, NULL, NULL);
}
err = erofs_read_superblock(sb);
@@ -627,7 +631,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
}
if (test_opt(&sbi->opt, DAX_ALWAYS)) {
- if (!sbi->dax_dev) {
+ if (!sbi->dif0.dax_dev) {
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
clear_opt(&sbi->opt, DAX_ALWAYS);
} else if (sbi->blkszbits != PAGE_SHIFT) {
@@ -703,16 +707,18 @@ static int erofs_fc_get_tree(struct fs_context *fc)
GET_TREE_BDEV_QUIET_LOOKUP : 0);
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
if (ret == -ENOTBLK) {
+ struct file *file;
+
if (!fc->source)
return invalf(fc, "No source specified");
- sbi->fdev = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
- if (IS_ERR(sbi->fdev))
- return PTR_ERR(sbi->fdev);
+ file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+ sbi->dif0.file = file;
- if (S_ISREG(file_inode(sbi->fdev)->i_mode) &&
- sbi->fdev->f_mapping->a_ops->read_folio)
+ if (S_ISREG(file_inode(sbi->dif0.file)->i_mode) &&
+ sbi->dif0.file->f_mapping->a_ops->read_folio)
return get_tree_nodev(fc, erofs_fc_fill_super);
- fput(sbi->fdev);
}
#endif
return ret;
@@ -763,19 +769,24 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs)
kfree(devs);
}
-static void erofs_fc_free(struct fs_context *fc)
+static void erofs_sb_free(struct erofs_sb_info *sbi)
{
- struct erofs_sb_info *sbi = fc->s_fs_info;
-
- if (!sbi)
- return;
-
erofs_free_dev_context(sbi->devs);
kfree(sbi->fsid);
kfree(sbi->domain_id);
+ if (sbi->dif0.file)
+ fput(sbi->dif0.file);
kfree(sbi);
}
+static void erofs_fc_free(struct fs_context *fc)
+{
+ struct erofs_sb_info *sbi = fc->s_fs_info;
+
+ if (sbi) /* free here if an error occurs before transferring to sb */
+ erofs_sb_free(sbi);
+}
+
static const struct fs_context_operations erofs_context_ops = {
.parse_param = erofs_fc_parse_param,
.get_tree = erofs_fc_get_tree,
@@ -809,19 +820,14 @@ static void erofs_kill_sb(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
- if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || sbi->fdev)
+ if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) ||
+ sbi->dif0.file)
kill_anon_super(sb);
else
kill_block_super(sb);
-
- erofs_free_dev_context(sbi->devs);
- fs_put_dax(sbi->dax_dev, NULL);
+ fs_put_dax(sbi->dif0.dax_dev, NULL);
erofs_fscache_unregister_fs(sb);
- kfree(sbi->fsid);
- kfree(sbi->domain_id);
- if (sbi->fdev)
- fput(sbi->fdev);
- kfree(sbi);
+ erofs_sb_free(sbi);
sb->s_fs_info = NULL;
}
@@ -947,6 +953,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",dax=always");
if (test_opt(opt, DAX_NEVER))
seq_puts(seq, ",dax=never");
+ if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO))
+ seq_puts(seq, ",directio");
#ifdef CONFIG_EROFS_FS_ONDEMAND
if (sbi->fsid)
seq_printf(seq, ",fsid=%s", sbi->fsid);
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 01f147505487..19ef4ff2a134 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1792,9 +1792,9 @@ drain_io:
erofs_fscache_submit_bio(bio);
else
submit_bio(bio);
- if (memstall)
- psi_memstall_leave(&pflags);
}
+ if (memstall)
+ psi_memstall_leave(&pflags);
/*
* although background is preferred, no one is pending for submission.
diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c
index 75704f58ecfa..0dd65cefce33 100644
--- a/fs/erofs/zutil.c
+++ b/fs/erofs/zutil.c
@@ -230,9 +230,10 @@ void erofs_shrinker_unregister(struct super_block *sb)
struct erofs_sb_info *const sbi = EROFS_SB(sb);
mutex_lock(&sbi->umount_mutex);
- /* clean up all remaining pclusters in memory */
- z_erofs_shrink_scan(sbi, ~0UL);
-
+ while (!xa_empty(&sbi->managed_pslots)) {
+ z_erofs_shrink_scan(sbi, ~0UL);
+ cond_resched();
+ }
spin_lock(&erofs_sb_list_lock);
list_del(&sbi->list);
spin_unlock(&erofs_sb_list_lock);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 90f883d6b8fd..fc1ae5132127 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -825,7 +825,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
error = PTR_ERR(folio);
goto out;
}
- folio_zero_user(folio, ALIGN_DOWN(addr, hpage_size));
+ folio_zero_user(folio, addr);
__folio_mark_uptodate(folio);
error = hugetlb_add_to_page_cache(folio, mapping, index);
if (unlikely(error)) {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0d16b383a452..5f582713bf05 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1308,7 +1308,7 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
enum pnfs_iomode *iomode)
{
/* Serialise LAYOUTGET/LAYOUTRETURN */
- if (atomic_read(&lo->plh_outstanding) != 0)
+ if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0)
return false;
if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
return false;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ae5c5e39afa0..aeb715b4a690 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -73,6 +73,7 @@
#include "nfs.h"
#include "netns.h"
#include "sysfs.h"
+#include "nfs4idmap.h"
#define NFSDBG_FACILITY NFSDBG_VFS
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index eacafe46e3b6..aa4712362b3b 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -40,24 +40,15 @@
#define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS)
#define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1)
-static void expkey_put_work(struct work_struct *work)
+static void expkey_put(struct kref *ref)
{
- struct svc_expkey *key =
- container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work);
+ struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
if (test_bit(CACHE_VALID, &key->h.flags) &&
!test_bit(CACHE_NEGATIVE, &key->h.flags))
path_put(&key->ek_path);
auth_domain_put(key->ek_client);
- kfree(key);
-}
-
-static void expkey_put(struct kref *ref)
-{
- struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
-
- INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
- queue_rcu_work(system_wq, &key->ek_rcu_work);
+ kfree_rcu(key, ek_rcu);
}
static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
@@ -364,26 +355,16 @@ static void export_stats_destroy(struct export_stats *stats)
EXP_STATS_COUNTERS_NUM);
}
-static void svc_export_put_work(struct work_struct *work)
+static void svc_export_put(struct kref *ref)
{
- struct svc_export *exp =
- container_of(to_rcu_work(work), struct svc_export, ex_rcu_work);
-
+ struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
path_put(&exp->ex_path);
auth_domain_put(exp->ex_client);
nfsd4_fslocs_free(&exp->ex_fslocs);
export_stats_destroy(exp->ex_stats);
kfree(exp->ex_stats);
kfree(exp->ex_uuid);
- kfree(exp);
-}
-
-static void svc_export_put(struct kref *ref)
-{
- struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
-
- INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
- queue_rcu_work(system_wq, &exp->ex_rcu_work);
+ kfree_rcu(exp, ex_rcu);
}
static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index 6f2fbaae01fa..4d92b99c1ffd 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -75,7 +75,7 @@ struct svc_export {
u32 ex_layout_types;
struct nfsd4_deviceid_map *ex_devid_map;
struct cache_detail *cd;
- struct rcu_work ex_rcu_work;
+ struct rcu_head ex_rcu;
unsigned long ex_xprtsec_modes;
struct export_stats *ex_stats;
};
@@ -92,7 +92,7 @@ struct svc_expkey {
u32 ek_fsid[6];
struct path ek_path;
- struct rcu_work ek_rcu_work;
+ struct rcu_head ek_rcu;
};
#define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC))
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3877b53e429f..c083e539e898 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1100,7 +1100,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
} else {
- if (!conn->cb_xprt)
+ if (!conn->cb_xprt || !ses)
return -EINVAL;
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
@@ -1522,8 +1522,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
ses = c->cn_session;
}
spin_unlock(&clp->cl_lock);
- if (!c)
- return;
err = setup_callback_client(clp, &conn, ses);
if (err) {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index f8a10f90bc7a..ad44ad49274f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1347,7 +1347,6 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
{
if (!refcount_dec_and_test(&copy->refcount))
return;
- atomic_dec(&copy->cp_nn->pending_async_copies);
kfree(copy->cp_src);
kfree(copy);
}
@@ -1870,6 +1869,7 @@ do_callback:
set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
trace_nfsd_copy_async_done(copy);
nfsd4_send_cb_offload(copy);
+ atomic_dec(&copy->cp_nn->pending_async_copies);
return 0;
}
@@ -1927,19 +1927,19 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* Arbitrary cap on number of pending async copy operations */
if (atomic_inc_return(&nn->pending_async_copies) >
(int)rqstp->rq_pool->sp_nrthreads)
- goto out_err;
+ goto out_dec_async_copy_err;
async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
if (!async_copy->cp_src)
- goto out_err;
+ goto out_dec_async_copy_err;
if (!nfs4_init_copy_state(nn, copy))
- goto out_err;
+ goto out_dec_async_copy_err;
memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
sizeof(result->cb_stateid));
dup_copy_fields(copy, async_copy);
async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
async_copy, "%s", "copy thread");
if (IS_ERR(async_copy->copy_task))
- goto out_err;
+ goto out_dec_async_copy_err;
spin_lock(&async_copy->cp_clp->async_lock);
list_add(&async_copy->copies,
&async_copy->cp_clp->async_copies);
@@ -1954,6 +1954,9 @@ out:
trace_nfsd_copy_done(copy, status);
release_copy_files(copy);
return status;
+out_dec_async_copy_err:
+ if (async_copy)
+ atomic_dec(&nn->pending_async_copies);
out_err:
if (nfsd4_ssc_is_inter(copy)) {
/*
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 501ad7be5174..54a3fa0cf67e 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,6 +35,7 @@ void nilfs_init_btnc_inode(struct inode *btnc_inode)
ii->i_flags = 0;
memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap));
mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS);
+ btnc_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
}
void nilfs_btnode_cache_clear(struct address_space *btnc)
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index ace22253fed0..2dbb15767df1 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -163,7 +163,7 @@ int nilfs_init_gcinode(struct inode *inode)
inode->i_mode = S_IFREG;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
- inode->i_mapping->a_ops = &empty_aops;
+ inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
ii->i_flags = 0;
nilfs_bmap_init_gc(ii->i_bmap);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index cf9ba481ae37..23f3a75edd50 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -276,6 +276,10 @@ const struct address_space_operations nilfs_aops = {
.is_partially_uptodate = block_is_partially_uptodate,
};
+const struct address_space_operations nilfs_buffer_cache_aops = {
+ .invalidate_folio = block_invalidate_folio,
+};
+
static int nilfs_insert_inode_locked(struct inode *inode,
struct nilfs_root *root,
unsigned long ino)
@@ -544,8 +548,14 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
inode = nilfs_iget_locked(sb, root, ino);
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+
+ if (!(inode->i_state & I_NEW)) {
+ if (!inode->i_nlink) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
return inode;
+ }
err = __nilfs_read_inode(sb, root, ino, inode);
if (unlikely(err)) {
@@ -675,6 +685,7 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode)
NILFS_I(s_inode)->i_flags = 0;
memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap));
mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS);
+ s_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
err = nilfs_attach_btree_node_cache(s_inode);
if (unlikely(err)) {
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 9b108052d9f7..1d836a5540f3 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -67,6 +67,11 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
inode = NULL;
} else {
inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino);
+ if (inode == ERR_PTR(-ESTALE)) {
+ nilfs_error(dir->i_sb,
+ "deleted inode referenced: %lu", ino);
+ return ERR_PTR(-EIO);
+ }
}
return d_splice_alias(inode, dentry);
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 45d03826eaf1..dff241c53fc5 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -401,6 +401,7 @@ extern const struct file_operations nilfs_dir_operations;
extern const struct inode_operations nilfs_file_inode_operations;
extern const struct file_operations nilfs_file_operations;
extern const struct address_space_operations nilfs_aops;
+extern const struct address_space_operations nilfs_buffer_cache_aops;
extern const struct inode_operations nilfs_dir_inode_operations;
extern const struct inode_operations nilfs_special_inode_operations;
extern const struct inode_operations nilfs_symlink_inode_operations;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 8ac42ea81a17..d1aa04a5af1b 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -971,9 +971,9 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
start = count = 0;
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
- while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) <
- left) {
- if (bit_off == start) {
+ while (1) {
+ bit_off = ocfs2_find_next_zero_bit(bitmap, left, start);
+ if ((bit_off < left) && (bit_off == start)) {
count++;
start++;
continue;
@@ -998,29 +998,12 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
}
}
+ if (bit_off >= left)
+ break;
count = 1;
start = bit_off + 1;
}
- /* clear the contiguous bits until the end boundary */
- if (count) {
- blkno = la_start_blk +
- ocfs2_clusters_to_blocks(osb->sb,
- start - count);
-
- trace_ocfs2_sync_local_to_main_free(
- count, start - count,
- (unsigned long long)la_start_blk,
- (unsigned long long)blkno);
-
- status = ocfs2_release_clusters(handle,
- main_bm_inode,
- main_bm_bh, blkno,
- count);
- if (status < 0)
- mlog_errno(status);
- }
-
bail:
if (status)
mlog_errno(status);
diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig
index 2aff6d1395ce..9f05f94e265a 100644
--- a/fs/smb/client/Kconfig
+++ b/fs/smb/client/Kconfig
@@ -2,7 +2,6 @@
config CIFS
tristate "SMB3 and CIFS support (advanced network filesystem)"
depends on INET
- select NETFS_SUPPORT
select NLS
select NLS_UCS2_UTILS
select CRYPTO
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 9d96b833015c..b800c9f585d8 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -398,7 +398,7 @@ cifs_alloc_inode(struct super_block *sb)
cifs_inode = alloc_inode_sb(sb, cifs_inode_cachep, GFP_KERNEL);
if (!cifs_inode)
return NULL;
- cifs_inode->cifsAttrs = 0x20; /* default */
+ cifs_inode->cifsAttrs = ATTR_ARCHIVE; /* default */
cifs_inode->time = 0;
/*
* Until the file is open and we have gotten oplock info back from the
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 2372538a1211..ddcc9e514a0e 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -987,9 +987,13 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
msleep(125);
if (cifs_rdma_enabled(server))
smbd_destroy(server);
+
if (server->ssocket) {
sock_release(server->ssocket);
server->ssocket = NULL;
+
+ /* Release netns reference for the socket. */
+ put_net(cifs_net_ns(server));
}
if (!list_empty(&server->pending_mid_q)) {
@@ -1037,6 +1041,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
*/
}
+ /* Release netns reference for this server. */
put_net(cifs_net_ns(server));
kfree(server->leaf_fullpath);
kfree(server);
@@ -1713,6 +1718,8 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx,
tcp_ses->ops = ctx->ops;
tcp_ses->vals = ctx->vals;
+
+ /* Grab netns reference for this server. */
cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId);
@@ -1844,6 +1851,7 @@ smbd_connected:
out_err_crypto_release:
cifs_crypto_secmech_release(tcp_ses);
+ /* Release netns reference for this server. */
put_net(cifs_net_ns(tcp_ses));
out_err:
@@ -1852,8 +1860,10 @@ out_err:
cifs_put_tcp_session(tcp_ses->primary_server, false);
kfree(tcp_ses->hostname);
kfree(tcp_ses->leaf_fullpath);
- if (tcp_ses->ssocket)
+ if (tcp_ses->ssocket) {
sock_release(tcp_ses->ssocket);
+ put_net(cifs_net_ns(tcp_ses));
+ }
kfree(tcp_ses);
}
return ERR_PTR(rc);
@@ -3131,20 +3141,20 @@ generic_ip_connect(struct TCP_Server_Info *server)
socket = server->ssocket;
} else {
struct net *net = cifs_net_ns(server);
- struct sock *sk;
- rc = __sock_create(net, sfamily, SOCK_STREAM,
- IPPROTO_TCP, &server->ssocket, 1);
+ rc = sock_create_kern(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket);
if (rc < 0) {
cifs_server_dbg(VFS, "Error %d creating socket\n", rc);
return rc;
}
- sk = server->ssocket->sk;
- __netns_tracker_free(net, &sk->ns_tracker, false);
- sk->sk_net_refcnt = 1;
- get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(net, 1);
+ /*
+ * Grab netns reference for the socket.
+ *
+ * It'll be released here, on error, or in clean_demultiplex_info() upon server
+ * teardown.
+ */
+ get_net(net);
/* BB other socket options to set KEEPALIVE, NODELAY? */
cifs_dbg(FYI, "Socket created\n");
@@ -3158,8 +3168,10 @@ generic_ip_connect(struct TCP_Server_Info *server)
}
rc = bind_socket(server);
- if (rc < 0)
+ if (rc < 0) {
+ put_net(cifs_net_ns(server));
return rc;
+ }
/*
* Eventually check for other socket options to change from
@@ -3196,6 +3208,7 @@ generic_ip_connect(struct TCP_Server_Info *server)
if (rc < 0) {
cifs_dbg(FYI, "Error %d connecting to server\n", rc);
trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc);
+ put_net(cifs_net_ns(server));
sock_release(socket);
server->ssocket = NULL;
return rc;
@@ -3204,6 +3217,9 @@ generic_ip_connect(struct TCP_Server_Info *server)
if (sport == htons(RFC1001_PORT))
rc = ip_rfc1001_connect(server);
+ if (rc < 0)
+ put_net(cifs_net_ns(server));
+
return rc;
}
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 010eae9d6c47..959359301250 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4840,6 +4840,8 @@ smb2_writev_callback(struct mid_q_entry *mid)
if (written > wdata->subreq.len)
written &= 0xFFFF;
+ cifs_stats_bytes_written(tcon, written);
+
if (written < wdata->subreq.len)
wdata->result = -ENOSPC;
else
@@ -5156,6 +5158,7 @@ replay_again:
cifs_dbg(VFS, "Send error in write = %d\n", rc);
} else {
*nbytes = le32_to_cpu(rsp->DataLength);
+ cifs_stats_bytes_written(io_parms->tcon, *nbytes);
trace_smb3_write_done(0, 0, xid,
req->PersistentFileId,
io_parms->tcon->tid,
@@ -6204,7 +6207,7 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
req->StructureSize = cpu_to_le16(36);
total_len += 12;
- memcpy(req->LeaseKey, lease_key, 16);
+ memcpy(req->LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE);
req->LeaseState = lease_state;
flags |= CIFS_NO_RSP_BUF;
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index c14dd72e1b30..f8a40f65db6a 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -70,7 +70,6 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
atomic_set(&conn->req_running, 0);
atomic_set(&conn->r_count, 0);
atomic_set(&conn->refcnt, 1);
- atomic_set(&conn->mux_smb_requests, 0);
conn->total_credits = 1;
conn->outstanding_credits = 0;
@@ -120,8 +119,8 @@ void ksmbd_conn_enqueue_request(struct ksmbd_work *work)
if (conn->ops->get_cmd_val(work) != SMB2_CANCEL_HE)
requests_queue = &conn->requests;
+ atomic_inc(&conn->req_running);
if (requests_queue) {
- atomic_inc(&conn->req_running);
spin_lock(&conn->request_lock);
list_add_tail(&work->request_entry, requests_queue);
spin_unlock(&conn->request_lock);
@@ -132,11 +131,14 @@ void ksmbd_conn_try_dequeue_request(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
+ atomic_dec(&conn->req_running);
+ if (waitqueue_active(&conn->req_running_q))
+ wake_up(&conn->req_running_q);
+
if (list_empty(&work->request_entry) &&
list_empty(&work->async_request_entry))
return;
- atomic_dec(&conn->req_running);
spin_lock(&conn->request_lock);
list_del_init(&work->request_entry);
spin_unlock(&conn->request_lock);
@@ -308,7 +310,7 @@ int ksmbd_conn_handler_loop(void *p)
{
struct ksmbd_conn *conn = (struct ksmbd_conn *)p;
struct ksmbd_transport *t = conn->transport;
- unsigned int pdu_size, max_allowed_pdu_size;
+ unsigned int pdu_size, max_allowed_pdu_size, max_req;
char hdr_buf[4] = {0,};
int size;
@@ -318,6 +320,7 @@ int ksmbd_conn_handler_loop(void *p)
if (t->ops->prepare && t->ops->prepare(t))
goto out;
+ max_req = server_conf.max_inflight_req;
conn->last_active = jiffies;
set_freezable();
while (ksmbd_conn_alive(conn)) {
@@ -327,6 +330,13 @@ int ksmbd_conn_handler_loop(void *p)
kvfree(conn->request_buf);
conn->request_buf = NULL;
+recheck:
+ if (atomic_read(&conn->req_running) + 1 > max_req) {
+ wait_event_interruptible(conn->req_running_q,
+ atomic_read(&conn->req_running) < max_req);
+ goto recheck;
+ }
+
size = t->ops->read(t, hdr_buf, sizeof(hdr_buf), -1);
if (size != sizeof(hdr_buf))
break;
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index 8ddd5a3c7baf..b379ae4fdcdf 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -107,7 +107,6 @@ struct ksmbd_conn {
__le16 signing_algorithm;
bool binding;
atomic_t refcnt;
- atomic_t mux_smb_requests;
};
struct ksmbd_conn_ops {
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index 3ba95bd8edeb..601e7fcbcf1e 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -270,7 +270,6 @@ static void handle_ksmbd_work(struct work_struct *wk)
ksmbd_conn_try_dequeue_request(work);
ksmbd_free_work_struct(work);
- atomic_dec(&conn->mux_smb_requests);
/*
* Checking waitqueue to dropping pending requests on
* disconnection. waitqueue_active is safe because it
@@ -300,11 +299,6 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn)
if (err)
return 0;
- if (atomic_inc_return(&conn->mux_smb_requests) >= conn->vals->max_credits) {
- atomic_dec_return(&conn->mux_smb_requests);
- return -ENOSPC;
- }
-
work = ksmbd_alloc_work_struct();
if (!work) {
pr_err("allocation for work failed\n");
@@ -367,6 +361,7 @@ static int server_conf_init(void)
server_conf.auth_mechs |= KSMBD_AUTH_KRB5 |
KSMBD_AUTH_MSKRB5;
#endif
+ server_conf.max_inflight_req = SMB2_MAX_CREDITS;
return 0;
}
diff --git a/fs/smb/server/server.h b/fs/smb/server/server.h
index 4fc529335271..94187628ff08 100644
--- a/fs/smb/server/server.h
+++ b/fs/smb/server/server.h
@@ -42,6 +42,7 @@ struct ksmbd_server_config {
struct smb_sid domain_sid;
unsigned int auth_mechs;
unsigned int max_connections;
+ unsigned int max_inflight_req;
char *conf[SERVER_CONF_WORK_GROUP + 1];
struct task_struct *dh_task;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 803b35b89513..23e21845f928 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1097,6 +1097,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
return rc;
}
+ ksmbd_conn_lock(conn);
smb2_buf_len = get_rfc1002_len(work->request_buf);
smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects);
if (smb2_neg_size > smb2_buf_len) {
@@ -1247,6 +1248,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
ksmbd_conn_set_need_negotiate(conn);
err_out:
+ ksmbd_conn_unlock(conn);
if (rc)
rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 48cda3350e5a..befaf42b84cc 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -319,8 +319,11 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
init_smb2_max_write_size(req->smb2_max_write);
if (req->smb2_max_trans)
init_smb2_max_trans_size(req->smb2_max_trans);
- if (req->smb2_max_credits)
+ if (req->smb2_max_credits) {
init_smb2_max_credits(req->smb2_max_credits);
+ server_conf.max_inflight_req =
+ req->smb2_max_credits;
+ }
if (req->smbd_max_io_size)
init_smbd_max_io_size(req->smbd_max_io_size);
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h
index 6cdc873ac907..aa5233b1eba9 100644
--- a/include/clocksource/hyperv_timer.h
+++ b/include/clocksource/hyperv_timer.h
@@ -38,6 +38,8 @@ extern void hv_remap_tsc_clocksource(void);
extern unsigned long hv_get_tsc_pfn(void);
extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
+extern void hv_adj_sched_clock_offset(u64 offset);
+
static __always_inline bool
hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
u64 *cur_tsc, u64 *time)
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 7c0786bdf9af..0bbbe537c5f9 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -63,7 +63,12 @@ static inline void set_codetag_empty(union codetag_ref *ref)
#else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
static inline bool is_codetag_empty(union codetag_ref *ref) { return false; }
-static inline void set_codetag_empty(union codetag_ref *ref) {}
+
+static inline void set_codetag_empty(union codetag_ref *ref)
+{
+ if (ref)
+ ref->ct = NULL;
+}
#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
@@ -135,7 +140,7 @@ static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag)
{
- WARN_ONCE(ref && ref->ct,
+ WARN_ONCE(ref && ref->ct && !is_codetag_empty(ref),
"alloc_tag was not cleared (got tag for %s:%u)\n",
ref->ct->filename, ref->ct->lineno);
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index a28e2a6a13d0..74169dd0f659 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -166,9 +166,12 @@ static inline void *ffa_dev_get_drvdata(struct ffa_device *fdev)
return dev_get_drvdata(&fdev->dev);
}
+struct ffa_partition_info;
+
#if IS_REACHABLE(CONFIG_ARM_FFA_TRANSPORT)
-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
- const struct ffa_ops *ops);
+struct ffa_device *
+ffa_device_register(const struct ffa_partition_info *part_info,
+ const struct ffa_ops *ops);
void ffa_device_unregister(struct ffa_device *ffa_dev);
int ffa_driver_register(struct ffa_driver *driver, struct module *owner,
const char *mod_name);
@@ -176,9 +179,9 @@ void ffa_driver_unregister(struct ffa_driver *driver);
bool ffa_device_is_valid(struct ffa_device *ffa_dev);
#else
-static inline
-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
- const struct ffa_ops *ops)
+static inline struct ffa_device *
+ffa_device_register(const struct ffa_partition_info *part_info,
+ const struct ffa_ops *ops)
{
return NULL;
}
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 108060612bb8..7ad736538649 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -155,8 +155,14 @@ static inline int get_cpu_cacheinfo_id(int cpu, int level)
#ifndef CONFIG_ARCH_HAS_CPU_CACHE_ALIASING
#define cpu_dcache_is_aliasing() false
+#define cpu_icache_is_aliasing() cpu_dcache_is_aliasing()
#else
#include <asm/cachetype.h>
+
+#ifndef cpu_icache_is_aliasing
+#define cpu_icache_is_aliasing() cpu_dcache_is_aliasing()
+#endif
+
#endif
#endif /* _LINUX_CACHEINFO_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 469a64dd6495..240c632c5b95 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -216,28 +216,43 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
#endif /* __KERNEL__ */
+/**
+ * offset_to_ptr - convert a relative memory offset to an absolute pointer
+ * @off: the address of the 32-bit offset value
+ */
+static inline void *offset_to_ptr(const int *off)
+{
+ return (void *)((unsigned long)off + *off);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_64BIT
+#define ARCH_SEL(a,b) a
+#else
+#define ARCH_SEL(a,b) b
+#endif
+
/*
* Force the compiler to emit 'sym' as a symbol, so that we can reference
* it from inline assembler. Necessary in case 'sym' could be inlined
* otherwise, or eliminated entirely due to lack of references that are
* visible to the compiler.
*/
-#define ___ADDRESSABLE(sym, __attrs) \
- static void * __used __attrs \
+#define ___ADDRESSABLE(sym, __attrs) \
+ static void * __used __attrs \
__UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym;
+
#define __ADDRESSABLE(sym) \
___ADDRESSABLE(sym, __section(".discard.addressable"))
-/**
- * offset_to_ptr - convert a relative memory offset to an absolute pointer
- * @off: the address of the 32-bit offset value
- */
-static inline void *offset_to_ptr(const int *off)
-{
- return (void *)((unsigned long)off + *off);
-}
+#define __ADDRESSABLE_ASM(sym) \
+ .pushsection .discard.addressable,"aw"; \
+ .align ARCH_SEL(8,4); \
+ ARCH_SEL(.quad, .long) __stringify(sym); \
+ .popsection;
-#endif /* __ASSEMBLY__ */
+#define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym))
#ifdef __CHECKER__
#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0)
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index b137fdb56093..346251bf1026 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -84,7 +84,7 @@ enum dma_transfer_direction {
DMA_TRANS_NONE,
};
-/**
+/*
* Interleaved Transfer Request
* ----------------------------
* A chunk is collection of contiguous bytes to be transferred.
@@ -223,7 +223,7 @@ enum sum_check_bits {
};
/**
- * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
+ * enum sum_check_flags - result of async_{xor,pq}_zero_sum operations
* @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
* @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
*/
@@ -286,7 +286,7 @@ typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
* pointer to the engine's metadata area
* 4. Read out the metadata from the pointer
*
- * Note: the two mode is not compatible and clients must use one mode for a
+ * Warning: the two modes are not compatible and clients must use one mode for a
* descriptor.
*/
enum dma_desc_metadata_mode {
@@ -594,9 +594,13 @@ struct dma_descriptor_metadata_ops {
* @phys: physical address of the descriptor
* @chan: target channel for this operation
* @tx_submit: accept the descriptor, assign ordered cookie and mark the
+ * @desc_free: driver's callback function to free a resusable descriptor
+ * after completion
* descriptor pending. To be pushed on .issue_pending() call
* @callback: routine to call after this operation is complete
+ * @callback_result: error result from a DMA transaction
* @callback_param: general parameter to pass to the callback routine
+ * @unmap: hook for generic DMA unmap data
* @desc_metadata_mode: core managed metadata mode to protect mixed use of
* DESC_METADATA_CLIENT or DESC_METADATA_ENGINE. Otherwise
* DESC_METADATA_NONE
@@ -827,6 +831,9 @@ struct dma_filter {
* @device_prep_dma_memset: prepares a memset operation
* @device_prep_dma_memset_sg: prepares a memset operation over a scatter list
* @device_prep_dma_interrupt: prepares an end of chain interrupt operation
+ * @device_prep_peripheral_dma_vec: prepares a scatter-gather DMA transfer,
+ * where the address and size of each segment is located in one entry of
+ * the dma_vec array.
* @device_prep_slave_sg: prepares a slave dma operation
* @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
* The function takes a buffer of size buf_len. The callback function will
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 0d99bf11d260..e4ce1cae03bf 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -616,6 +616,12 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
return false;
}
+/*
+ * To work around what seems to be an optimizer bug, the macro arguments
+ * need to have const copies or the values end up changed by the time they
+ * reach fortify_warn_once(). See commit 6f7630b1b5bc ("fortify: Capture
+ * __bos() results in const temp vars") for more details.
+ */
#define __fortify_memcpy_chk(p, q, size, p_size, q_size, \
p_size_field, q_size_field, op) ({ \
const size_t __fortify_size = (size_t)(size); \
@@ -623,6 +629,8 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
const size_t __q_size = (q_size); \
const size_t __p_size_field = (p_size_field); \
const size_t __q_size_field = (q_size_field); \
+ /* Keep a mutable version of the size for the final copy. */ \
+ size_t __copy_size = __fortify_size; \
fortify_warn_once(fortify_memcpy_chk(__fortify_size, __p_size, \
__q_size, __p_size_field, \
__q_size_field, FORTIFY_FUNC_ ##op), \
@@ -630,7 +638,11 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
__fortify_size, \
"field \"" #p "\" at " FILE_LINE, \
__p_size_field); \
- __underlying_##op(p, q, __fortify_size); \
+ /* Hide only the run-time size from value range tracking to */ \
+ /* silence compile-time false positive bounds warnings. */ \
+ if (!__builtin_constant_p(__copy_size)) \
+ OPTIMIZER_HIDE_VAR(__copy_size); \
+ __underlying_##op(p, q, __copy_size); \
})
/*
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 6e452bd8e7e3..5c6bea81a90e 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -224,7 +224,13 @@ static inline
struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
unsigned long vaddr)
{
- return vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr);
+ struct folio *folio;
+
+ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr);
+ if (folio && user_alloc_needs_zeroing())
+ clear_user_highpage(&folio->page, vaddr);
+
+ return folio;
}
#endif
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 22c22fb91042..02a226bcf0ed 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1559,6 +1559,7 @@ struct hv_util_service {
void *channel;
void (*util_cb)(void *);
int (*util_init)(struct hv_util_service *);
+ int (*util_init_transport)(void);
void (*util_deinit)(void);
int (*util_pre_suspend)(void);
int (*util_pre_resume)(void);
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index e123d5e17b52..85fe4e6b275c 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -15,10 +15,8 @@ bool io_is_uring_fops(struct file *file);
static inline void io_uring_files_cancel(void)
{
- if (current->io_uring) {
- io_uring_unreg_ringfd();
+ if (current->io_uring)
__io_uring_cancel(false);
- }
}
static inline void io_uring_task_cancel(void)
{
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 011860ade268..fd4cdb0860a2 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -345,7 +345,7 @@ struct io_ring_ctx {
/* timeouts */
struct {
- spinlock_t timeout_lock;
+ raw_spinlock_t timeout_lock;
struct list_head timeout_list;
struct list_head ltimeout_list;
unsigned cq_last_tm_flush;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c39c4945946c..338a76ce9083 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -31,6 +31,7 @@
#include <linux/kasan.h>
#include <linux/memremap.h>
#include <linux/slab.h>
+#include <linux/cacheinfo.h>
struct mempolicy;
struct anon_vma;
@@ -3010,7 +3011,15 @@ static inline void pagetable_pte_dtor(struct ptdesc *ptdesc)
lruvec_stat_sub_folio(folio, NR_PAGETABLE);
}
-pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
+pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
+static inline pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr,
+ pmd_t *pmdvalp)
+{
+ pte_t *pte;
+
+ __cond_lock(RCU, pte = ___pte_offset_map(pmd, addr, pmdvalp));
+ return pte;
+}
static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr)
{
return __pte_offset_map(pmd, addr, NULL);
@@ -3023,7 +3032,8 @@ static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
{
pte_t *pte;
- __cond_lock(*ptlp, pte = __pte_offset_map_lock(mm, pmd, addr, ptlp));
+ __cond_lock(RCU, __cond_lock(*ptlp,
+ pte = __pte_offset_map_lock(mm, pmd, addr, ptlp)));
return pte;
}
@@ -4175,6 +4185,23 @@ static inline int do_mseal(unsigned long start, size_t len_in, unsigned long fla
}
#endif
+/*
+ * user_alloc_needs_zeroing checks if a user folio from page allocator needs to
+ * be zeroed or not.
+ */
+static inline bool user_alloc_needs_zeroing(void)
+{
+ /*
+ * for user folios, arch with cache aliasing requires cache flush and
+ * arc changes folio->flags to make icache coherent with dcache, so
+ * always return false to make caller use
+ * clear_user_page()/clear_user_highpage().
+ */
+ return cpu_dcache_is_aliasing() || cpu_icache_is_aliasing() ||
+ !static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
+ &init_on_alloc);
+}
+
int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status);
int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status);
int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index cf46ac720802..691506bdf2c5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -862,18 +862,10 @@ static inline void ClearPageCompound(struct page *page)
ClearPageHead(page);
}
FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE)
-FOLIO_TEST_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
-/*
- * PG_partially_mapped is protected by deferred_split split_queue_lock,
- * so its safe to use non-atomic set/clear.
- */
-__FOLIO_SET_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
-__FOLIO_CLEAR_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
+FOLIO_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
#else
FOLIO_FLAG_FALSE(large_rmappable)
-FOLIO_TEST_FLAG_FALSE(partially_mapped)
-__FOLIO_SET_FLAG_NOOP(partially_mapped)
-__FOLIO_CLEAR_FLAG_NOOP(partially_mapped)
+FOLIO_FLAG_FALSE(partially_mapped)
#endif
#define PG_head_mask ((1UL << PG_head))
diff --git a/include/linux/platform_data/amd_qdma.h b/include/linux/platform_data/amd_qdma.h
index 576d952f97ed..967a6ef31cf9 100644
--- a/include/linux/platform_data/amd_qdma.h
+++ b/include/linux/platform_data/amd_qdma.h
@@ -26,11 +26,13 @@ struct dma_slave_map;
* @max_mm_channels: Maximum number of MM DMA channels in each direction
* @device_map: DMA slave map
* @irq_index: The index of first IRQ
+ * @dma_dev: The device pointer for dma operations
*/
struct qdma_platdata {
u32 max_mm_channels;
u32 irq_index;
struct dma_slave_map *device_map;
+ struct device *dma_dev;
};
#endif /* _PLATDATA_AMD_QDMA_H */
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index d9b03e0746e7..2cbe0c22a32f 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -317,17 +317,22 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
}
-static inline void sk_psock_queue_msg(struct sk_psock *psock,
+static inline bool sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
+ bool ret;
+
spin_lock_bh(&psock->ingress_lock);
- if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
list_add_tail(&msg->list, &psock->ingress_msg);
- else {
+ ret = true;
+ } else {
sk_msg_free(psock->sk, msg);
kfree(msg);
+ ret = false;
}
spin_unlock_bh(&psock->ingress_lock);
+ return ret;
}
static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 141e6b176a1b..78a77a4ae0ea 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -160,6 +160,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool
#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+extern int static_call_initialized;
+
extern int __init static_call_init(void);
extern void static_call_force_reinit(void);
@@ -225,6 +227,8 @@ extern long __static_call_return0(void);
#elif defined(CONFIG_HAVE_STATIC_CALL)
+#define static_call_initialized 0
+
static inline int static_call_init(void) { return 0; }
#define DEFINE_STATIC_CALL(name, _func) \
@@ -281,6 +285,8 @@ extern long __static_call_return0(void);
#else /* Generic implementation */
+#define static_call_initialized 0
+
static inline int static_call_init(void) { return 0; }
static inline long __static_call_return0(void)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2a5df5b62cfc..91b8ffbdfa8c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -273,7 +273,8 @@ struct trace_event_fields {
const char *name;
const int size;
const int align;
- const int is_signed;
+ const unsigned int is_signed:1;
+ unsigned int needs_test:1;
const int filter_type;
const int len;
};
@@ -324,6 +325,7 @@ enum {
TRACE_EVENT_FL_EPROBE_BIT,
TRACE_EVENT_FL_FPROBE_BIT,
TRACE_EVENT_FL_CUSTOM_BIT,
+ TRACE_EVENT_FL_TEST_STR_BIT,
};
/*
@@ -340,6 +342,7 @@ enum {
* CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint)
* This is set when the custom event has not been attached
* to a tracepoint yet, then it is cleared when it is.
+ * TEST_STR - The event has a "%s" that points to a string outside the event
*/
enum {
TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
@@ -352,6 +355,7 @@ enum {
TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT),
TRACE_EVENT_FL_FPROBE = (1 << TRACE_EVENT_FL_FPROBE_BIT),
TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT),
+ TRACE_EVENT_FL_TEST_STR = (1 << TRACE_EVENT_FL_TEST_STR_BIT),
};
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index a54046bf37e5..939ceabcaf06 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -15,10 +15,10 @@
#else
#define MODULE_VERMAGIC_SMP ""
#endif
-#ifdef CONFIG_PREEMPT_BUILD
-#define MODULE_VERMAGIC_PREEMPT "preempt "
-#elif defined(CONFIG_PREEMPT_RT)
+#ifdef CONFIG_PREEMPT_RT
#define MODULE_VERMAGIC_PREEMPT "preempt_rt "
+#elif defined(CONFIG_PREEMPT_BUILD)
+#define MODULE_VERMAGIC_PREEMPT "preempt "
#else
#define MODULE_VERMAGIC_PREEMPT ""
#endif
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d2761bf8ff32..9f3a04345b86 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -515,7 +515,7 @@ static inline const char *node_stat_name(enum node_stat_item item)
static inline const char *lru_list_name(enum lru_list lru)
{
- return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
+ return node_stat_name(NR_LRU_BASE + (enum node_stat_item)lru) + 3; // skip "nr_"
}
#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
diff --git a/include/net/sock.h b/include/net/sock.h
index 7464e9f9f47c..c383126f691d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1527,7 +1527,7 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size)
}
static inline bool
-sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
+__sk_rmem_schedule(struct sock *sk, int size, bool pfmemalloc)
{
int delta;
@@ -1535,7 +1535,13 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
return true;
delta = size - sk->sk_forward_alloc;
return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
- skb_pfmemalloc(skb);
+ pfmemalloc;
+}
+
+static inline bool
+sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
+{
+ return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb));
}
static inline int sk_unused_reserved_mem(const struct sock *sk)
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index ba8604bdf206..349718c271eb 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -3,8 +3,8 @@
#define _UAPI_LINUX_THERMAL_H
#define THERMAL_NAME_LENGTH 20
-#define THERMAL_THRESHOLD_WAY_UP BIT(0)
-#define THERMAL_THRESHOLD_WAY_DOWN BIT(1)
+#define THERMAL_THRESHOLD_WAY_UP 0x1
+#define THERMAL_THRESHOLD_WAY_DOWN 0x2
enum thermal_device_mode {
THERMAL_DEVICE_DISABLED = 0,
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 06ff41484e29..d3403c8216db 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -215,9 +215,9 @@ bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
struct io_ring_ctx *ctx = head->ctx;
/* protect against races with linked timeouts */
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
matched = io_match_linked(head);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
} else {
matched = io_match_linked(head);
}
@@ -333,7 +333,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_waitqueue_head(&ctx->cq_wait);
init_waitqueue_head(&ctx->poll_wq);
spin_lock_init(&ctx->completion_lock);
- spin_lock_init(&ctx->timeout_lock);
+ raw_spin_lock_init(&ctx->timeout_lock);
INIT_WQ_LIST(&ctx->iopoll_list);
INIT_LIST_HEAD(&ctx->io_buffers_comp);
INIT_LIST_HEAD(&ctx->defer_list);
@@ -498,10 +498,10 @@ static void io_prep_async_link(struct io_kiocb *req)
if (req->flags & REQ_F_LINK_TIMEOUT) {
struct io_ring_ctx *ctx = req->ctx;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
io_for_each_link(cur, req)
io_prep_async_work(cur);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
} else {
io_for_each_link(cur, req)
io_prep_async_work(cur);
@@ -514,7 +514,11 @@ static void io_queue_iowq(struct io_kiocb *req)
struct io_uring_task *tctx = req->tctx;
BUG_ON(!tctx);
- BUG_ON(!tctx->io_wq);
+
+ if ((current->flags & PF_KTHREAD) || !tctx->io_wq) {
+ io_req_task_queue_fail(req, -ECANCELED);
+ return;
+ }
/* init ->work of the whole link before punting */
io_prep_async_link(req);
@@ -3214,6 +3218,7 @@ end_wait:
void __io_uring_cancel(bool cancel_all)
{
+ io_uring_unreg_ringfd();
io_uring_cancel_generic(cancel_all, NULL);
}
diff --git a/io_uring/register.c b/io_uring/register.c
index 1e99c783abdf..fdd44914c39c 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -414,6 +414,9 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER &&
current != ctx->submitter_task)
return -EEXIST;
+ /* limited to DEFER_TASKRUN for now */
+ if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
+ return -EINVAL;
if (copy_from_user(&p, arg, sizeof(p)))
return -EFAULT;
if (p.flags & ~RESIZE_FLAGS)
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index f3d502717aeb..bbe58638eca7 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -74,10 +74,10 @@ static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
if (!io_timeout_finish(timeout, data)) {
if (io_req_post_cqe(req, -ETIME, IORING_CQE_F_MORE)) {
/* re-arm timer */
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
list_add(&timeout->list, ctx->timeout_list.prev);
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
return;
}
}
@@ -109,7 +109,7 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
u32 seq;
struct io_timeout *timeout, *tmp;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
@@ -134,7 +134,7 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
io_kill_timeout(req, 0);
}
ctx->cq_last_tm_flush = seq;
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
}
static void io_req_tw_fail_links(struct io_kiocb *link, struct io_tw_state *ts)
@@ -200,9 +200,9 @@ void io_disarm_next(struct io_kiocb *req)
} else if (req->flags & REQ_F_LINK_TIMEOUT) {
struct io_ring_ctx *ctx = req->ctx;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
link = io_disarm_linked_timeout(req);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
if (link)
io_req_queue_tw_complete(link, -ECANCELED);
}
@@ -238,11 +238,11 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->timeout_lock, flags);
+ raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
list_del_init(&timeout->list);
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
- spin_unlock_irqrestore(&ctx->timeout_lock, flags);
+ raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
req_set_fail(req);
@@ -285,9 +285,9 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
{
struct io_kiocb *req;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
req = io_timeout_extract(ctx, cd);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -330,7 +330,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->timeout_lock, flags);
+ raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
prev = timeout->head;
timeout->head = NULL;
@@ -345,7 +345,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
}
list_del(&timeout->list);
timeout->prev = prev;
- spin_unlock_irqrestore(&ctx->timeout_lock, flags);
+ raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
req->io_task_work.func = io_req_task_link_timeout;
io_req_task_work_add(req);
@@ -472,12 +472,12 @@ int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
} else {
enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
if (tr->ltimeout)
ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
else
ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
}
if (ret < 0)
@@ -572,7 +572,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
struct list_head *entry;
u32 tail, off = timeout->off;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
/*
* sqe->off holds how many events that need to occur for this
@@ -611,7 +611,7 @@ add:
list_add(&timeout->list, entry);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
return IOU_ISSUE_SKIP_COMPLETE;
}
@@ -620,7 +620,7 @@ void io_queue_linked_timeout(struct io_kiocb *req)
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
struct io_ring_ctx *ctx = req->ctx;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
/*
* If the back reference is NULL, then our linked request finished
* before we got a chance to setup the timer
@@ -633,7 +633,7 @@ void io_queue_linked_timeout(struct io_kiocb *req)
data->mode);
list_add_tail(&timeout->list, &ctx->ltimeout_list);
}
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
/* drop submission reference */
io_put_req(req);
}
@@ -668,7 +668,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx
* timeout_lockfirst to keep locking ordering.
*/
spin_lock(&ctx->completion_lock);
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
struct io_kiocb *req = cmd_to_io_kiocb(timeout);
@@ -676,7 +676,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx
io_kill_timeout(req, -ECANCELED))
canceled++;
}
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
spin_unlock(&ctx->completion_lock);
return canceled != 0;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f7f892a52a37..77f56674aaa9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -21281,11 +21281,15 @@ patch_map_ops_generic:
* changed in some incompatible and hard to support
* way, it's fine to back out this inlining logic
*/
+#ifdef CONFIG_SMP
insn_buf[0] = BPF_MOV32_IMM(BPF_REG_0, (u32)(unsigned long)&pcpu_hot.cpu_number);
insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
cnt = 3;
-
+#else
+ insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
+ cnt = 1;
+#endif
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
diff --git a/kernel/fork.c b/kernel/fork.c
index 1450b461d196..9b301180fd41 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -639,11 +639,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
LIST_HEAD(uf);
VMA_ITERATOR(vmi, mm, 0);
- uprobe_start_dup_mmap();
- if (mmap_write_lock_killable(oldmm)) {
- retval = -EINTR;
- goto fail_uprobe_end;
- }
+ if (mmap_write_lock_killable(oldmm))
+ return -EINTR;
flush_cache_dup_mm(oldmm);
uprobe_dup_mmap(oldmm, mm);
/*
@@ -782,8 +779,6 @@ out:
dup_userfaultfd_complete(&uf);
else
dup_userfaultfd_fail(&uf);
-fail_uprobe_end:
- uprobe_end_dup_mmap();
return retval;
fail_nomem_anon_vma_fork:
@@ -1692,9 +1687,11 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
if (!mm_init(mm, tsk, mm->user_ns))
goto fail_nomem;
+ uprobe_start_dup_mmap();
err = dup_mmap(mm, oldmm);
if (err)
goto free_pt;
+ uprobe_end_dup_mmap();
mm->hiwater_rss = get_mm_rss(mm);
mm->hiwater_vm = mm->total_vm;
@@ -1709,6 +1706,8 @@ free_pt:
mm->binfmt = NULL;
mm_init_owner(mm, NULL);
mmput(mm);
+ if (err)
+ uprobe_end_dup_mmap();
fail_nomem:
return NULL;
diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
index 5259cda486d0..bb7d066a7c39 100644
--- a/kernel/static_call_inline.c
+++ b/kernel/static_call_inline.c
@@ -15,7 +15,7 @@ extern struct static_call_site __start_static_call_sites[],
extern struct static_call_tramp_key __start_static_call_tramp_key[],
__stop_static_call_tramp_key[];
-static int static_call_initialized;
+int static_call_initialized;
/*
* Must be called before early_initcall() to be effective.
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 0bf78517b5d4..ddedcb50917f 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -1215,7 +1215,7 @@ void fgraph_update_pid_func(void)
static int start_graph_tracing(void)
{
unsigned long **ret_stack_list;
- int ret;
+ int ret, cpu;
ret_stack_list = kcalloc(FTRACE_RETSTACK_ALLOC_SIZE,
sizeof(*ret_stack_list), GFP_KERNEL);
@@ -1223,6 +1223,12 @@ static int start_graph_tracing(void)
if (!ret_stack_list)
return -ENOMEM;
+ /* The cpu_boot init_task->ret_stack will never be freed */
+ for_each_online_cpu(cpu) {
+ if (!idle_task(cpu)->ret_stack)
+ ftrace_graph_init_idle_task(idle_task(cpu), cpu);
+ }
+
do {
ret = alloc_retstack_tasklist(ret_stack_list);
} while (ret == -EAGAIN);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7e257e855dd1..60210fb5b211 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -7019,7 +7019,11 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
lockdep_assert_held(&cpu_buffer->mapping_lock);
nr_subbufs = cpu_buffer->nr_pages + 1; /* + reader-subbuf */
- nr_pages = ((nr_subbufs + 1) << subbuf_order) - pgoff; /* + meta-page */
+ nr_pages = ((nr_subbufs + 1) << subbuf_order); /* + meta-page */
+ if (nr_pages <= pgoff)
+ return -EINVAL;
+
+ nr_pages -= pgoff;
nr_vma_pages = vma_pages(vma);
if (!nr_vma_pages || nr_vma_pages > nr_pages)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index be62f0ea1814..957f941a08e7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3611,17 +3611,12 @@ char *trace_iter_expand_format(struct trace_iterator *iter)
}
/* Returns true if the string is safe to dereference from an event */
-static bool trace_safe_str(struct trace_iterator *iter, const char *str,
- bool star, int len)
+static bool trace_safe_str(struct trace_iterator *iter, const char *str)
{
unsigned long addr = (unsigned long)str;
struct trace_event *trace_event;
struct trace_event_call *event;
- /* Ignore strings with no length */
- if (star && !len)
- return true;
-
/* OK if part of the event data */
if ((addr >= (unsigned long)iter->ent) &&
(addr < (unsigned long)iter->ent + iter->ent_size))
@@ -3661,181 +3656,69 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str,
return false;
}
-static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
-
-static int test_can_verify_check(const char *fmt, ...)
-{
- char buf[16];
- va_list ap;
- int ret;
-
- /*
- * The verifier is dependent on vsnprintf() modifies the va_list
- * passed to it, where it is sent as a reference. Some architectures
- * (like x86_32) passes it by value, which means that vsnprintf()
- * does not modify the va_list passed to it, and the verifier
- * would then need to be able to understand all the values that
- * vsnprintf can use. If it is passed by value, then the verifier
- * is disabled.
- */
- va_start(ap, fmt);
- vsnprintf(buf, 16, "%d", ap);
- ret = va_arg(ap, int);
- va_end(ap);
-
- return ret;
-}
-
-static void test_can_verify(void)
-{
- if (!test_can_verify_check("%d %d", 0, 1)) {
- pr_info("trace event string verifier disabled\n");
- static_branch_inc(&trace_no_verify);
- }
-}
-
/**
- * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
+ * ignore_event - Check dereferenced fields while writing to the seq buffer
* @iter: The iterator that holds the seq buffer and the event being printed
- * @fmt: The format used to print the event
- * @ap: The va_list holding the data to print from @fmt.
*
- * This writes the data into the @iter->seq buffer using the data from
- * @fmt and @ap. If the format has a %s, then the source of the string
- * is examined to make sure it is safe to print, otherwise it will
- * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
- * pointer.
+ * At boot up, test_event_printk() will flag any event that dereferences
+ * a string with "%s" that does exist in the ring buffer. It may still
+ * be valid, as the string may point to a static string in the kernel
+ * rodata that never gets freed. But if the string pointer is pointing
+ * to something that was allocated, there's a chance that it can be freed
+ * by the time the user reads the trace. This would cause a bad memory
+ * access by the kernel and possibly crash the system.
+ *
+ * This function will check if the event has any fields flagged as needing
+ * to be checked at runtime and perform those checks.
+ *
+ * If it is found that a field is unsafe, it will write into the @iter->seq
+ * a message stating what was found to be unsafe.
+ *
+ * @return: true if the event is unsafe and should be ignored,
+ * false otherwise.
*/
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap)
+bool ignore_event(struct trace_iterator *iter)
{
- long text_delta = 0;
- long data_delta = 0;
- const char *p = fmt;
- const char *str;
- bool good;
- int i, j;
+ struct ftrace_event_field *field;
+ struct trace_event *trace_event;
+ struct trace_event_call *event;
+ struct list_head *head;
+ struct trace_seq *seq;
+ const void *ptr;
- if (WARN_ON_ONCE(!fmt))
- return;
+ trace_event = ftrace_find_event(iter->ent->type);
- if (static_branch_unlikely(&trace_no_verify))
- goto print;
+ seq = &iter->seq;
- /*
- * When the kernel is booted with the tp_printk command line
- * parameter, trace events go directly through to printk().
- * It also is checked by this function, but it does not
- * have an associated trace_array (tr) for it.
- */
- if (iter->tr) {
- text_delta = iter->tr->text_delta;
- data_delta = iter->tr->data_delta;
+ if (!trace_event) {
+ trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
+ return true;
}
- /* Don't bother checking when doing a ftrace_dump() */
- if (iter->fmt == static_fmt_buf)
- goto print;
-
- while (*p) {
- bool star = false;
- int len = 0;
-
- j = 0;
-
- /*
- * We only care about %s and variants
- * as well as %p[sS] if delta is non-zero
- */
- for (i = 0; p[i]; i++) {
- if (i + 1 >= iter->fmt_size) {
- /*
- * If we can't expand the copy buffer,
- * just print it.
- */
- if (!trace_iter_expand_format(iter))
- goto print;
- }
-
- if (p[i] == '\\' && p[i+1]) {
- i++;
- continue;
- }
- if (p[i] == '%') {
- /* Need to test cases like %08.*s */
- for (j = 1; p[i+j]; j++) {
- if (isdigit(p[i+j]) ||
- p[i+j] == '.')
- continue;
- if (p[i+j] == '*') {
- star = true;
- continue;
- }
- break;
- }
- if (p[i+j] == 's')
- break;
-
- if (text_delta && p[i+1] == 'p' &&
- ((p[i+2] == 's' || p[i+2] == 'S')))
- break;
-
- star = false;
- }
- j = 0;
- }
- /* If no %s found then just print normally */
- if (!p[i])
- break;
-
- /* Copy up to the %s, and print that */
- strncpy(iter->fmt, p, i);
- iter->fmt[i] = '\0';
- trace_seq_vprintf(&iter->seq, iter->fmt, ap);
+ event = container_of(trace_event, struct trace_event_call, event);
+ if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
+ return false;
- /* Add delta to %pS pointers */
- if (p[i+1] == 'p') {
- unsigned long addr;
- char fmt[4];
+ head = trace_get_fields(event);
+ if (!head) {
+ trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
+ trace_event_name(event));
+ return true;
+ }
- fmt[0] = '%';
- fmt[1] = 'p';
- fmt[2] = p[i+2]; /* Either %ps or %pS */
- fmt[3] = '\0';
+ /* Offsets are from the iter->ent that points to the raw event */
+ ptr = iter->ent;
- addr = va_arg(ap, unsigned long);
- addr += text_delta;
- trace_seq_printf(&iter->seq, fmt, (void *)addr);
+ list_for_each_entry(field, head, link) {
+ const char *str;
+ bool good;
- p += i + 3;
+ if (!field->needs_test)
continue;
- }
- /*
- * If iter->seq is full, the above call no longer guarantees
- * that ap is in sync with fmt processing, and further calls
- * to va_arg() can return wrong positional arguments.
- *
- * Ensure that ap is no longer used in this case.
- */
- if (iter->seq.full) {
- p = "";
- break;
- }
+ str = *(const char **)(ptr + field->offset);
- if (star)
- len = va_arg(ap, int);
-
- /* The ap now points to the string data of the %s */
- str = va_arg(ap, const char *);
-
- good = trace_safe_str(iter, str, star, len);
-
- /* Could be from the last boot */
- if (data_delta && !good) {
- str += data_delta;
- good = trace_safe_str(iter, str, star, len);
- }
+ good = trace_safe_str(iter, str);
/*
* If you hit this warning, it is likely that the
@@ -3846,44 +3729,14 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
* instead. See samples/trace_events/trace-events-sample.h
* for reference.
*/
- if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
- fmt, seq_buf_str(&iter->seq.seq))) {
- int ret;
-
- /* Try to safely read the string */
- if (star) {
- if (len + 1 > iter->fmt_size)
- len = iter->fmt_size - 1;
- if (len < 0)
- len = 0;
- ret = copy_from_kernel_nofault(iter->fmt, str, len);
- iter->fmt[len] = 0;
- star = false;
- } else {
- ret = strncpy_from_kernel_nofault(iter->fmt, str,
- iter->fmt_size);
- }
- if (ret < 0)
- trace_seq_printf(&iter->seq, "(0x%px)", str);
- else
- trace_seq_printf(&iter->seq, "(0x%px:%s)",
- str, iter->fmt);
- str = "[UNSAFE-MEMORY]";
- strcpy(iter->fmt, "%s");
- } else {
- strncpy(iter->fmt, p + i, j + 1);
- iter->fmt[j+1] = '\0';
+ if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
+ trace_event_name(event), field->name)) {
+ trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
+ trace_event_name(event), field->name);
+ return true;
}
- if (star)
- trace_seq_printf(&iter->seq, iter->fmt, len, str);
- else
- trace_seq_printf(&iter->seq, iter->fmt, str);
-
- p += i + j + 1;
}
- print:
- if (*p)
- trace_seq_vprintf(&iter->seq, p, ap);
+ return false;
}
const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
@@ -4353,6 +4206,15 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
if (event) {
if (tr->trace_flags & TRACE_ITER_FIELDS)
return print_event_fields(iter, event);
+ /*
+ * For TRACE_EVENT() events, the print_fmt is not
+ * safe to use if the array has delta offsets
+ * Force printing via the fields.
+ */
+ if ((tr->text_delta || tr->data_delta) &&
+ event->type > __TRACE_LAST_TYPE)
+ return print_event_fields(iter, event);
+
return event->funcs->trace(iter, sym_flags, event);
}
@@ -10777,8 +10639,6 @@ __init static int tracer_alloc_buffers(void)
register_snapshot_cmd();
- test_can_verify();
-
return 0;
out_free_pipe_cpumask:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 266740b4e121..9691b47b5f3d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -667,9 +667,8 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
bool trace_is_tracepoint_string(const char *str);
const char *trace_event_format(struct trace_iterator *iter, const char *fmt);
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap) __printf(2, 0);
char *trace_iter_expand_format(struct trace_iterator *iter);
+bool ignore_event(struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
@@ -1413,7 +1412,8 @@ struct ftrace_event_field {
int filter_type;
int offset;
int size;
- int is_signed;
+ unsigned int is_signed:1;
+ unsigned int needs_test:1;
int len;
};
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 77e68efbd43e..1545cc8b49d0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -82,7 +82,7 @@ static int system_refcount_dec(struct event_subsystem *system)
}
static struct ftrace_event_field *
-__find_event_field(struct list_head *head, char *name)
+__find_event_field(struct list_head *head, const char *name)
{
struct ftrace_event_field *field;
@@ -114,7 +114,8 @@ trace_find_event_field(struct trace_event_call *call, char *name)
static int __trace_define_field(struct list_head *head, const char *type,
const char *name, int offset, int size,
- int is_signed, int filter_type, int len)
+ int is_signed, int filter_type, int len,
+ int need_test)
{
struct ftrace_event_field *field;
@@ -133,6 +134,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
field->offset = offset;
field->size = size;
field->is_signed = is_signed;
+ field->needs_test = need_test;
field->len = len;
list_add(&field->link, head);
@@ -151,13 +153,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, 0);
+ is_signed, filter_type, 0, 0);
}
EXPORT_SYMBOL_GPL(trace_define_field);
static int trace_define_field_ext(struct trace_event_call *call, const char *type,
const char *name, int offset, int size, int is_signed,
- int filter_type, int len)
+ int filter_type, int len, int need_test)
{
struct list_head *head;
@@ -166,13 +168,13 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, len);
+ is_signed, filter_type, len, need_test);
}
#define __generic_field(type, item, filter_type) \
ret = __trace_define_field(&ftrace_generic_fields, #type, \
#item, 0, 0, is_signed_type(type), \
- filter_type, 0); \
+ filter_type, 0, 0); \
if (ret) \
return ret;
@@ -181,7 +183,8 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
"common_" #item, \
offsetof(typeof(ent), item), \
sizeof(ent.item), \
- is_signed_type(type), FILTER_OTHER, 0); \
+ is_signed_type(type), FILTER_OTHER, \
+ 0, 0); \
if (ret) \
return ret;
@@ -244,19 +247,16 @@ int trace_event_get_offsets(struct trace_event_call *call)
return tail->offset + tail->size;
}
-/*
- * Check if the referenced field is an array and return true,
- * as arrays are OK to dereference.
- */
-static bool test_field(const char *fmt, struct trace_event_call *call)
+
+static struct trace_event_fields *find_event_field(const char *fmt,
+ struct trace_event_call *call)
{
struct trace_event_fields *field = call->class->fields_array;
- const char *array_descriptor;
const char *p = fmt;
int len;
if (!(len = str_has_prefix(fmt, "REC->")))
- return false;
+ return NULL;
fmt += len;
for (p = fmt; *p; p++) {
if (!isalnum(*p) && *p != '_')
@@ -265,16 +265,129 @@ static bool test_field(const char *fmt, struct trace_event_call *call)
len = p - fmt;
for (; field->type; field++) {
- if (strncmp(field->name, fmt, len) ||
- field->name[len])
+ if (strncmp(field->name, fmt, len) || field->name[len])
continue;
- array_descriptor = strchr(field->type, '[');
- /* This is an array and is OK to dereference. */
- return array_descriptor != NULL;
+
+ return field;
+ }
+ return NULL;
+}
+
+/*
+ * Check if the referenced field is an array and return true,
+ * as arrays are OK to dereference.
+ */
+static bool test_field(const char *fmt, struct trace_event_call *call)
+{
+ struct trace_event_fields *field;
+
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* This is an array and is OK to dereference. */
+ return strchr(field->type, '[') != NULL;
+}
+
+/* Look for a string within an argument */
+static bool find_print_string(const char *arg, const char *str, const char *end)
+{
+ const char *r;
+
+ r = strstr(arg, str);
+ return r && r < end;
+}
+
+/* Return true if the argument pointer is safe */
+static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
+{
+ const char *r, *e, *a;
+
+ e = fmt + len;
+
+ /* Find the REC-> in the argument */
+ r = strstr(fmt, "REC->");
+ if (r && r < e) {
+ /*
+ * Addresses of events on the buffer, or an array on the buffer is
+ * OK to dereference. There's ways to fool this, but
+ * this is to catch common mistakes, not malicious code.
+ */
+ a = strchr(fmt, '&');
+ if ((a && (a < r)) || test_field(r, call))
+ return true;
+ } else if (find_print_string(fmt, "__get_dynamic_array(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_sockaddr(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) {
+ return true;
}
return false;
}
+/* Return true if the string is safe */
+static bool process_string(const char *fmt, int len, struct trace_event_call *call)
+{
+ struct trace_event_fields *field;
+ const char *r, *e, *s;
+
+ e = fmt + len;
+
+ /*
+ * There are several helper functions that return strings.
+ * If the argument contains a function, then assume its field is valid.
+ * It is considered that the argument has a function if it has:
+ * alphanumeric or '_' before a parenthesis.
+ */
+ s = fmt;
+ do {
+ r = strstr(s, "(");
+ if (!r || r >= e)
+ break;
+ for (int i = 1; r - i >= s; i++) {
+ char ch = *(r - i);
+ if (isspace(ch))
+ continue;
+ if (isalnum(ch) || ch == '_')
+ return true;
+ /* Anything else, this isn't a function */
+ break;
+ }
+ /* A function could be wrapped in parethesis, try the next one */
+ s = r + 1;
+ } while (s < e);
+
+ /*
+ * If there's any strings in the argument consider this arg OK as it
+ * could be: REC->field ? "foo" : "bar" and we don't want to get into
+ * verifying that logic here.
+ */
+ if (find_print_string(fmt, "\"", e))
+ return true;
+
+ /* Dereferenced strings are also valid like any other pointer */
+ if (process_pointer(fmt, len, call))
+ return true;
+
+ /* Make sure the field is found */
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* Test this field's string before printing the event */
+ call->flags |= TRACE_EVENT_FL_TEST_STR;
+ field->needs_test = 1;
+
+ return true;
+}
+
/*
* Examine the print fmt of the event looking for unsafe dereference
* pointers using %p* that could be recorded in the trace event and
@@ -284,13 +397,14 @@ static bool test_field(const char *fmt, struct trace_event_call *call)
static void test_event_printk(struct trace_event_call *call)
{
u64 dereference_flags = 0;
+ u64 string_flags = 0;
bool first = true;
- const char *fmt, *c, *r, *a;
+ const char *fmt;
int parens = 0;
char in_quote = 0;
int start_arg = 0;
int arg = 0;
- int i;
+ int i, e;
fmt = call->print_fmt;
@@ -374,8 +488,16 @@ static void test_event_printk(struct trace_event_call *call)
star = true;
continue;
}
- if ((fmt[i + j] == 's') && star)
- arg++;
+ if ((fmt[i + j] == 's')) {
+ if (star)
+ arg++;
+ if (WARN_ONCE(arg == 63,
+ "Too many args for event: %s",
+ trace_event_name(call)))
+ return;
+ dereference_flags |= 1ULL << arg;
+ string_flags |= 1ULL << arg;
+ }
break;
}
break;
@@ -403,42 +525,47 @@ static void test_event_printk(struct trace_event_call *call)
case ',':
if (in_quote || parens)
continue;
+ e = i;
i++;
while (isspace(fmt[i]))
i++;
- start_arg = i;
- if (!(dereference_flags & (1ULL << arg)))
- goto next_arg;
- /* Find the REC-> in the argument */
- c = strchr(fmt + i, ',');
- r = strstr(fmt + i, "REC->");
- if (r && (!c || r < c)) {
- /*
- * Addresses of events on the buffer,
- * or an array on the buffer is
- * OK to dereference.
- * There's ways to fool this, but
- * this is to catch common mistakes,
- * not malicious code.
- */
- a = strchr(fmt + i, '&');
- if ((a && (a < r)) || test_field(r, call))
+ /*
+ * If start_arg is zero, then this is the start of the
+ * first argument. The processing of the argument happens
+ * when the end of the argument is found, as it needs to
+ * handle paranthesis and such.
+ */
+ if (!start_arg) {
+ start_arg = i;
+ /* Balance out the i++ in the for loop */
+ i--;
+ continue;
+ }
+
+ if (dereference_flags & (1ULL << arg)) {
+ if (string_flags & (1ULL << arg)) {
+ if (process_string(fmt + start_arg, e - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ } else if (process_pointer(fmt + start_arg, e - start_arg, call))
dereference_flags &= ~(1ULL << arg);
- } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) &&
- (!c || r < c)) {
- dereference_flags &= ~(1ULL << arg);
- } else if ((r = strstr(fmt + i, "__get_sockaddr(")) &&
- (!c || r < c)) {
- dereference_flags &= ~(1ULL << arg);
}
- next_arg:
- i--;
+ start_arg = i;
arg++;
+ /* Balance out the i++ in the for loop */
+ i--;
}
}
+ if (dereference_flags & (1ULL << arg)) {
+ if (string_flags & (1ULL << arg)) {
+ if (process_string(fmt + start_arg, i - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ } else if (process_pointer(fmt + start_arg, i - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ }
+
/*
* If you triggered the below warning, the trace event reported
* uses an unsafe dereference pointer %p*. As the data stored
@@ -2471,7 +2598,7 @@ event_define_fields(struct trace_event_call *call)
ret = trace_define_field_ext(call, field->type, field->name,
offset, field->size,
field->is_signed, field->filter_type,
- field->len);
+ field->len, field->needs_test);
if (WARN_ON_ONCE(ret)) {
pr_err("error code is %d\n", ret);
break;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 74c353164ca1..d358c9935164 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -176,7 +176,8 @@ static void function_trace_start(struct trace_array *tr)
tracing_reset_online_cpus(&tr->array_buffer);
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/* fregs are guaranteed not to be NULL if HAVE_DYNAMIC_FTRACE_WITH_ARGS is set */
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)
static __always_inline unsigned long
function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
{
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index da748b7cbc4d..03d56f711ad1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -317,10 +317,14 @@ EXPORT_SYMBOL(trace_raw_output_prep);
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...)
{
+ struct trace_seq *s = &iter->seq;
va_list ap;
+ if (ignore_event(iter))
+ return;
+
va_start(ap, fmt);
- trace_check_vprintf(iter, trace_event_format(iter, fmt), ap);
+ trace_seq_vprintf(s, trace_event_format(iter, fmt), ap);
va_end(ap);
}
EXPORT_SYMBOL(trace_event_printf);
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 35f7560a309a..7dcebf118a3e 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -209,6 +209,13 @@ void pgalloc_tag_swap(struct folio *new, struct folio *old)
return;
}
+ /*
+ * Clear tag references to avoid debug warning when using
+ * __alloc_tag_ref_set() with non-empty reference.
+ */
+ set_codetag_empty(&ref_old);
+ set_codetag_empty(&ref_new);
+
/* swap tags */
__alloc_tag_ref_set(&ref_old, tag_new);
update_page_tag_ref(handle_old, &ref_old);
@@ -401,28 +408,52 @@ repeat:
static int vm_module_tags_populate(void)
{
- unsigned long phys_size = vm_module_tags->nr_pages << PAGE_SHIFT;
+ unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) +
+ (vm_module_tags->nr_pages << PAGE_SHIFT);
+ unsigned long new_end = module_tags.start_addr + module_tags.size;
- if (phys_size < module_tags.size) {
+ if (phys_end < new_end) {
struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages;
- unsigned long addr = module_tags.start_addr + phys_size;
+ unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN);
+ unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN);
unsigned long more_pages;
unsigned long nr;
- more_pages = ALIGN(module_tags.size - phys_size, PAGE_SIZE) >> PAGE_SHIFT;
+ more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT;
nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN,
NUMA_NO_NODE, more_pages, next_page);
if (nr < more_pages ||
- vmap_pages_range(addr, addr + (nr << PAGE_SHIFT), PAGE_KERNEL,
+ vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL,
next_page, PAGE_SHIFT) < 0) {
/* Clean up and error out */
for (int i = 0; i < nr; i++)
__free_page(next_page[i]);
return -ENOMEM;
}
+
vm_module_tags->nr_pages += nr;
+
+ /*
+ * Kasan allocates 1 byte of shadow for every 8 bytes of data.
+ * When kasan_alloc_module_shadow allocates shadow memory,
+ * its unit of allocation is a page.
+ * Therefore, here we need to align to MODULE_ALIGN.
+ */
+ if (old_shadow_end < new_shadow_end)
+ kasan_alloc_module_shadow((void *)old_shadow_end,
+ new_shadow_end - old_shadow_end,
+ GFP_KERNEL);
}
+ /*
+ * Mark the pages as accessible, now that they are mapped.
+ * With hardware tag-based KASAN, marking is skipped for
+ * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
+ */
+ kasan_unpoison_vmalloc((void *)module_tags.start_addr,
+ new_end - module_tags.start_addr,
+ KASAN_VMALLOC_PROT_NORMAL);
+
return 0;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ee335d96fc39..e53d83b3e5cf 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1176,11 +1176,12 @@ static struct folio *vma_alloc_anon_folio_pmd(struct vm_area_struct *vma,
folio_throttle_swaprate(folio, gfp);
/*
- * When a folio is not zeroed during allocation (__GFP_ZERO not used),
- * folio_zero_user() is used to make sure that the page corresponding
- * to the faulting address will be hot in the cache after zeroing.
+ * When a folio is not zeroed during allocation (__GFP_ZERO not used)
+ * or user folios require special handling, folio_zero_user() is used to
+ * make sure that the page corresponding to the faulting address will be
+ * hot in the cache after zeroing.
*/
- if (!alloc_zeroed())
+ if (user_alloc_needs_zeroing())
folio_zero_user(folio, addr);
/*
* The memory barrier inside __folio_mark_uptodate makes sure that
@@ -3576,7 +3577,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
!list_empty(&folio->_deferred_list)) {
ds_queue->split_queue_len--;
if (folio_test_partially_mapped(folio)) {
- __folio_clear_partially_mapped(folio);
+ folio_clear_partially_mapped(folio);
mod_mthp_stat(folio_order(folio),
MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
}
@@ -3688,7 +3689,7 @@ bool __folio_unqueue_deferred_split(struct folio *folio)
if (!list_empty(&folio->_deferred_list)) {
ds_queue->split_queue_len--;
if (folio_test_partially_mapped(folio)) {
- __folio_clear_partially_mapped(folio);
+ folio_clear_partially_mapped(folio);
mod_mthp_stat(folio_order(folio),
MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
}
@@ -3732,7 +3733,7 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped)
spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
if (partially_mapped) {
if (!folio_test_partially_mapped(folio)) {
- __folio_set_partially_mapped(folio);
+ folio_set_partially_mapped(folio);
if (folio_test_pmd_mappable(folio))
count_vm_event(THP_DEFERRED_SPLIT_PAGE);
count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED);
@@ -3825,7 +3826,7 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
} else {
/* We lost race with folio_put() */
if (folio_test_partially_mapped(folio)) {
- __folio_clear_partially_mapped(folio);
+ folio_clear_partially_mapped(folio);
mod_mthp_stat(folio_order(folio),
MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
}
@@ -4168,7 +4169,7 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
size_t input_len = strlen(input_buf);
tok = strsep(&buf, ",");
- if (tok) {
+ if (tok && buf) {
strscpy(file_path, tok);
} else {
ret = -EINVAL;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ea2ed8e301ef..cec4b121193f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5340,7 +5340,7 @@ again:
break;
}
ret = copy_user_large_folio(new_folio, pte_folio,
- ALIGN_DOWN(addr, sz), dst_vma);
+ addr, dst_vma);
folio_put(pte_folio);
if (ret) {
folio_put(new_folio);
@@ -6643,8 +6643,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
*foliop = NULL;
goto out;
}
- ret = copy_user_large_folio(folio, *foliop,
- ALIGN_DOWN(dst_addr, size), dst_vma);
+ ret = copy_user_large_folio(folio, *foliop, dst_addr, dst_vma);
folio_put(*foliop);
*foliop = NULL;
if (ret) {
diff --git a/mm/internal.h b/mm/internal.h
index cb8d8e8e3ffa..3bd08bafad04 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1285,12 +1285,6 @@ void touch_pud(struct vm_area_struct *vma, unsigned long addr,
void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, bool write);
-static inline bool alloc_zeroed(void)
-{
- return static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
- &init_on_alloc);
-}
-
/*
* Parses a string with mem suffixes into its order. Useful to parse kernel
* parameters.
diff --git a/mm/memory.c b/mm/memory.c
index 75c2dfd04f72..398c031be9ba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4733,12 +4733,12 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
folio_throttle_swaprate(folio, gfp);
/*
* When a folio is not zeroed during allocation
- * (__GFP_ZERO not used), folio_zero_user() is used
- * to make sure that the page corresponding to the
- * faulting address will be hot in the cache after
- * zeroing.
+ * (__GFP_ZERO not used) or user folios require special
+ * handling, folio_zero_user() is used to make sure
+ * that the page corresponding to the faulting address
+ * will be hot in the cache after zeroing.
*/
- if (!alloc_zeroed())
+ if (user_alloc_needs_zeroing())
folio_zero_user(folio, vmf->address);
return folio;
}
@@ -6815,9 +6815,10 @@ static inline int process_huge_page(
return 0;
}
-static void clear_gigantic_page(struct folio *folio, unsigned long addr,
+static void clear_gigantic_page(struct folio *folio, unsigned long addr_hint,
unsigned int nr_pages)
{
+ unsigned long addr = ALIGN_DOWN(addr_hint, folio_size(folio));
int i;
might_sleep();
@@ -6851,13 +6852,14 @@ void folio_zero_user(struct folio *folio, unsigned long addr_hint)
}
static int copy_user_gigantic_page(struct folio *dst, struct folio *src,
- unsigned long addr,
+ unsigned long addr_hint,
struct vm_area_struct *vma,
unsigned int nr_pages)
{
- int i;
+ unsigned long addr = ALIGN_DOWN(addr_hint, folio_size(dst));
struct page *dst_page;
struct page *src_page;
+ int i;
for (i = 0; i < nr_pages; i++) {
dst_page = folio_page(dst, i);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1cb4b8c8886d..cae7b93864c2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1238,13 +1238,15 @@ static void split_large_buddy(struct zone *zone, struct page *page,
if (order > pageblock_order)
order = pageblock_order;
- while (pfn != end) {
+ do {
int mt = get_pfnblock_migratetype(page, pfn);
__free_one_page(page, pfn, zone, order, mt, fpi);
pfn += 1 << order;
+ if (pfn == end)
+ break;
page = pfn_to_page(pfn);
- }
+ } while (1);
}
static void free_one_page(struct zone *zone, struct page *page,
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 5297dcc38c37..5a882f2b10f9 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -279,7 +279,7 @@ static unsigned long pmdp_get_lockless_start(void) { return 0; }
static void pmdp_get_lockless_end(unsigned long irqflags) { }
#endif
-pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp)
+pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp)
{
unsigned long irqflags;
pmd_t pmdval;
diff --git a/mm/shmem.c b/mm/shmem.c
index ccb9629a0f70..f6fb053ac50d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -787,6 +787,14 @@ static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static void shmem_update_stats(struct folio *folio, int nr_pages)
+{
+ if (folio_test_pmd_mappable(folio))
+ __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr_pages);
+ __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages);
+ __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_pages);
+}
+
/*
* Somewhat like filemap_add_folio, but error if expected item has gone.
*/
@@ -821,10 +829,7 @@ static int shmem_add_to_page_cache(struct folio *folio,
xas_store(&xas, folio);
if (xas_error(&xas))
goto unlock;
- if (folio_test_pmd_mappable(folio))
- __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr);
- __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
- __lruvec_stat_mod_folio(folio, NR_SHMEM, nr);
+ shmem_update_stats(folio, nr);
mapping->nrpages += nr;
unlock:
xas_unlock_irq(&xas);
@@ -852,8 +857,7 @@ static void shmem_delete_from_page_cache(struct folio *folio, void *radswap)
error = shmem_replace_entry(mapping, folio->index, folio, radswap);
folio->mapping = NULL;
mapping->nrpages -= nr;
- __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
- __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);
+ shmem_update_stats(folio, -nr);
xa_unlock_irq(&mapping->i_pages);
folio_put_refs(folio, nr);
BUG_ON(error);
@@ -1969,10 +1973,8 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
}
if (!error) {
mem_cgroup_replace_folio(old, new);
- __lruvec_stat_mod_folio(new, NR_FILE_PAGES, nr_pages);
- __lruvec_stat_mod_folio(new, NR_SHMEM, nr_pages);
- __lruvec_stat_mod_folio(old, NR_FILE_PAGES, -nr_pages);
- __lruvec_stat_mod_folio(old, NR_SHMEM, -nr_pages);
+ shmem_update_stats(new, nr_pages);
+ shmem_update_stats(old, -nr_pages);
}
xa_unlock_irq(&swap_mapping->i_pages);
diff --git a/mm/vma.c b/mm/vma.c
index 8e31b7e25aeb..bb2119e5a0d0 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2460,10 +2460,13 @@ unsigned long __mmap_region(struct file *file, unsigned long addr,
/* If flags changed, we might be able to merge, so try again. */
if (map.retry_merge) {
+ struct vm_area_struct *merged;
VMG_MMAP_STATE(vmg, &map, vma);
vma_iter_config(map.vmi, map.addr, map.end);
- vma_merge_existing_range(&vmg);
+ merged = vma_merge_existing_range(&vmg);
+ if (merged)
+ vma = merged;
}
__mmap_complete(&map, vma);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f009b21705c1..5c88d0e90c20 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3374,7 +3374,8 @@ void vfree(const void *addr)
struct page *page = vm->pages[i];
BUG_ON(!page);
- mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
+ if (!(vm->flags & VM_MAP_PUT_PAGES))
+ mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
/*
* High-order allocs for huge vmallocs are split, so
* can be freed as an array of order-0 allocations
@@ -3382,7 +3383,8 @@ void vfree(const void *addr)
__free_page(page);
cond_resched();
}
- atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
+ if (!(vm->flags & VM_MAP_PUT_PAGES))
+ atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
kvfree(vm->pages);
kfree(vm);
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 9b1168eb77ab..b24afec24138 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1173,6 +1173,8 @@ EXPORT_SYMBOL(ceph_osdc_new_request);
int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
{
+ WARN_ON(op->op != CEPH_OSD_OP_SPARSE_READ);
+
op->extent.sparse_ext_cnt = cnt;
op->extent.sparse_ext = kmalloc_array(cnt,
sizeof(*op->extent.sparse_ext),
diff --git a/net/core/filter.c b/net/core/filter.c
index 21131ec25f24..834614071727 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3734,13 +3734,22 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
static u32 __bpf_skb_min_len(const struct sk_buff *skb)
{
- u32 min_len = skb_network_offset(skb);
+ int offset = skb_network_offset(skb);
+ u32 min_len = 0;
- if (skb_transport_header_was_set(skb))
- min_len = skb_transport_offset(skb);
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- min_len = skb_checksum_start_offset(skb) +
- skb->csum_offset + sizeof(__sum16);
+ if (offset > 0)
+ min_len = offset;
+ if (skb_transport_header_was_set(skb)) {
+ offset = skb_transport_offset(skb);
+ if (offset > 0)
+ min_len = offset;
+ }
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ offset = skb_checksum_start_offset(skb) +
+ skb->csum_offset + sizeof(__sum16);
+ if (offset > 0)
+ min_len = offset;
+ }
return min_len;
}
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 9527dd46e4dc..2d3ae0cd3ad2 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -430,10 +430,10 @@ static int
netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
u32 q_type, const struct genl_info *info)
{
- int err = 0;
+ int err;
if (!(netdev->flags & IFF_UP))
- return err;
+ return -ENOENT;
err = netdev_nl_queue_validate(netdev, q_idx, q_type);
if (err)
@@ -488,24 +488,21 @@ netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
struct netdev_nl_dump_ctx *ctx)
{
int err = 0;
- int i;
if (!(netdev->flags & IFF_UP))
return err;
- for (i = ctx->rxq_idx; i < netdev->real_num_rx_queues;) {
- err = netdev_nl_queue_fill_one(rsp, netdev, i,
+ for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) {
+ err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx,
NETDEV_QUEUE_TYPE_RX, info);
if (err)
return err;
- ctx->rxq_idx = i++;
}
- for (i = ctx->txq_idx; i < netdev->real_num_tx_queues;) {
- err = netdev_nl_queue_fill_one(rsp, netdev, i,
+ for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) {
+ err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx,
NETDEV_QUEUE_TYPE_TX, info);
if (err)
return err;
- ctx->txq_idx = i++;
}
return err;
@@ -671,7 +668,7 @@ netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
i, info);
if (err)
return err;
- ctx->rxq_idx = i++;
+ ctx->rxq_idx = ++i;
}
i = ctx->txq_idx;
while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) {
@@ -679,7 +676,7 @@ netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
i, info);
if (err)
return err;
- ctx->txq_idx = i++;
+ ctx->txq_idx = ++i;
}
ctx->rxq_idx = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ebcfc2debf1a..d9f959c619d9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3819,6 +3819,7 @@ out_unregister:
}
static struct net *rtnl_get_peer_net(const struct rtnl_link_ops *ops,
+ struct nlattr *tbp[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
{
@@ -3826,7 +3827,7 @@ static struct net *rtnl_get_peer_net(const struct rtnl_link_ops *ops,
int err;
if (!data || !data[ops->peer_type])
- return NULL;
+ return rtnl_link_get_net_ifla(tbp);
err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack);
if (err < 0)
@@ -3971,7 +3972,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
if (ops->peer_type) {
- peer_net = rtnl_get_peer_net(ops, data, extack);
+ peer_net = rtnl_get_peer_net(ops, tb, data, extack);
if (IS_ERR(peer_net)) {
ret = PTR_ERR(peer_net);
goto put_ops;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index e90fbab703b2..61f3f3d4e528 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -369,8 +369,8 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes)
{
int ret = -ENOSPC, i = msg->sg.curr;
+ u32 copy, buf_size, copied = 0;
struct scatterlist *sge;
- u32 copy, buf_size;
void *to;
do {
@@ -397,6 +397,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
goto out;
}
bytes -= copy;
+ copied += copy;
if (!bytes)
break;
msg->sg.copybreak = 0;
@@ -404,7 +405,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
} while (i != msg->sg.end);
out:
msg->sg.curr = i;
- return ret;
+ return (ret < 0) ? ret : copied;
}
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
@@ -445,8 +446,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
if (likely(!peek)) {
sge->offset += copy;
sge->length -= copy;
- if (!msg_rx->skb)
+ if (!msg_rx->skb) {
sk_mem_uncharge(sk, copy);
+ atomic_sub(copy, &sk->sk_rmem_alloc);
+ }
msg_rx->sg.size -= copy;
if (!sge->length) {
@@ -772,6 +775,8 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
list_del(&msg->list);
+ if (!msg->skb)
+ atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
sk_msg_free(psock->sk, msg);
kfree(msg);
}
diff --git a/net/dsa/tag.h b/net/dsa/tag.h
index d5707870906b..5d80ddad4ff6 100644
--- a/net/dsa/tag.h
+++ b/net/dsa/tag.h
@@ -138,9 +138,10 @@ static inline void dsa_software_untag_vlan_unaware_bridge(struct sk_buff *skb,
* dsa_software_vlan_untag: Software VLAN untagging in DSA receive path
* @skb: Pointer to socket buffer (packet)
*
- * Receive path method for switches which cannot avoid tagging all packets
- * towards the CPU port. Called when ds->untag_bridge_pvid (legacy) or
- * ds->untag_vlan_aware_bridge_pvid is set to true.
+ * Receive path method for switches which send some packets as VLAN-tagged
+ * towards the CPU port (generally from VLAN-aware bridge ports) even when the
+ * packet was not tagged on the wire. Called when ds->untag_bridge_pvid
+ * (legacy) or ds->untag_vlan_aware_bridge_pvid is set to true.
*
* As a side effect of this method, any VLAN tag from the skb head is moved
* to hwaccel.
@@ -149,14 +150,19 @@ static inline struct sk_buff *dsa_software_vlan_untag(struct sk_buff *skb)
{
struct dsa_port *dp = dsa_user_to_port(skb->dev);
struct net_device *br = dsa_port_bridge_dev_get(dp);
- u16 vid;
+ u16 vid, proto;
+ int err;
/* software untagging for standalone ports not yet necessary */
if (!br)
return skb;
+ err = br_vlan_get_proto(br, &proto);
+ if (err)
+ return skb;
+
/* Move VLAN tag from data to hwaccel */
- if (!skb_vlan_tag_present(skb)) {
+ if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) {
skb = skb_vlan_untag(skb);
if (!skb)
return NULL;
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 99cef92e6290..47f65b1b70ca 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -49,13 +49,14 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
sge = sk_msg_elem(msg, i);
size = (apply && apply_bytes < sge->length) ?
apply_bytes : sge->length;
- if (!sk_wmem_schedule(sk, size)) {
+ if (!__sk_rmem_schedule(sk, size, false)) {
if (!copied)
ret = -ENOMEM;
break;
}
sk_mem_charge(sk, size);
+ atomic_add(size, &sk->sk_rmem_alloc);
sk_msg_xfer(tmp, msg, i, size);
copied += size;
if (sge->length)
@@ -74,7 +75,8 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
if (!ret) {
msg->sg.start = i;
- sk_psock_queue_msg(psock, tmp);
+ if (!sk_psock_queue_msg(psock, tmp))
+ atomic_sub(copied, &sk->sk_rmem_alloc);
sk_psock_data_ready(sk, psock);
} else {
sk_msg_free(sk, tmp);
@@ -493,7 +495,7 @@ more_data:
static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
struct sk_msg tmp, *msg_tx = NULL;
- int copied = 0, err = 0;
+ int copied = 0, err = 0, ret = 0;
struct sk_psock *psock;
long timeo;
int flags;
@@ -536,14 +538,14 @@ static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
copy = msg_tx->sg.size - osize;
}
- err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx,
+ ret = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx,
copy);
- if (err < 0) {
+ if (ret < 0) {
sk_msg_trim(sk, msg_tx, osize);
goto out_err;
}
- copied += copy;
+ copied += ret;
if (psock->cork_bytes) {
if (size > psock->cork_bytes)
psock->cork_bytes = 0;
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 597e9cf5aa64..3f2bd65ff5e3 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -374,8 +374,13 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
msk = NULL;
rc = -EINVAL;
- /* we may be receiving a locally-routed packet; drop source sk
- * accounting
+ /* We may be receiving a locally-routed packet; drop source sk
+ * accounting.
+ *
+ * From here, we will either queue the skb - either to a frag_queue, or
+ * to a receiving socket. When that succeeds, we clear the skb pointer;
+ * a non-NULL skb on exit will be otherwise unowned, and hence
+ * kfree_skb()-ed.
*/
skb_orphan(skb);
@@ -434,7 +439,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* pending key.
*/
if (flags & MCTP_HDR_FLAG_EOM) {
- sock_queue_rcv_skb(&msk->sk, skb);
+ rc = sock_queue_rcv_skb(&msk->sk, skb);
+ if (!rc)
+ skb = NULL;
if (key) {
/* we've hit a pending reassembly; not much we
* can do but drop it
@@ -443,7 +450,6 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
MCTP_TRACE_KEY_REPLIED);
key = NULL;
}
- rc = 0;
goto out_unlock;
}
@@ -470,8 +476,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* this function.
*/
rc = mctp_key_add(key, msk);
- if (!rc)
+ if (!rc) {
trace_mctp_key_acquire(key);
+ skb = NULL;
+ }
/* we don't need to release key->lock on exit, so
* clean up here and suppress the unlock via
@@ -489,6 +497,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
key = NULL;
} else {
rc = mctp_frag_queue(key, skb);
+ if (!rc)
+ skb = NULL;
}
}
@@ -503,12 +513,19 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
else
rc = mctp_frag_queue(key, skb);
+ if (rc)
+ goto out_unlock;
+
+ /* we've queued; the queue owns the skb now */
+ skb = NULL;
+
/* end of message? deliver to socket, and we're done with
* the reassembly/response key
*/
- if (!rc && flags & MCTP_HDR_FLAG_EOM) {
- sock_queue_rcv_skb(key->sk, key->reasm_head);
- key->reasm_head = NULL;
+ if (flags & MCTP_HDR_FLAG_EOM) {
+ rc = sock_queue_rcv_skb(key->sk, key->reasm_head);
+ if (!rc)
+ key->reasm_head = NULL;
__mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
key = NULL;
}
@@ -527,8 +544,7 @@ out_unlock:
if (any_key)
mctp_key_unref(any_key);
out:
- if (rc)
- kfree_skb(skb);
+ kfree_skb(skb);
return rc;
}
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 8551dab1d1e6..17165b86ce22 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -837,6 +837,90 @@ static void mctp_test_route_input_multiple_nets_key(struct kunit *test)
mctp_test_route_input_multiple_nets_key_fini(test, &t2);
}
+/* Input route to socket, using a single-packet message, where sock delivery
+ * fails. Ensure we're handling the failure appropriately.
+ */
+static void mctp_test_route_input_sk_fail_single(struct kunit *test)
+{
+ const struct mctp_hdr hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO);
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct socket *sock;
+ struct sk_buff *skb;
+ int rc;
+
+ __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
+
+ /* No rcvbuf space, so delivery should fail. __sock_set_rcvbuf will
+ * clamp the minimum to SOCK_MIN_RCVBUF, so we open-code this.
+ */
+ lock_sock(sock->sk);
+ WRITE_ONCE(sock->sk->sk_rcvbuf, 0);
+ release_sock(sock->sk);
+
+ skb = mctp_test_create_skb(&hdr, 10);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+ skb_get(skb);
+
+ mctp_test_skb_set_dev(skb, dev);
+
+ /* do route input, which should fail */
+ rc = mctp_route_input(&rt->rt, skb);
+ KUNIT_EXPECT_NE(test, rc, 0);
+
+ /* we should hold the only reference to skb */
+ KUNIT_EXPECT_EQ(test, refcount_read(&skb->users), 1);
+ kfree_skb(skb);
+
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
+/* Input route to socket, using a fragmented message, where sock delivery fails.
+ */
+static void mctp_test_route_input_sk_fail_frag(struct kunit *test)
+{
+ const struct mctp_hdr hdrs[2] = { RX_FRAG(FL_S, 0), RX_FRAG(FL_E, 1) };
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct sk_buff *skbs[2];
+ struct socket *sock;
+ unsigned int i;
+ int rc;
+
+ __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
+
+ lock_sock(sock->sk);
+ WRITE_ONCE(sock->sk->sk_rcvbuf, 0);
+ release_sock(sock->sk);
+
+ for (i = 0; i < ARRAY_SIZE(skbs); i++) {
+ skbs[i] = mctp_test_create_skb(&hdrs[i], 10);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skbs[i]);
+ skb_get(skbs[i]);
+
+ mctp_test_skb_set_dev(skbs[i], dev);
+ }
+
+ /* first route input should succeed, we're only queueing to the
+ * frag list
+ */
+ rc = mctp_route_input(&rt->rt, skbs[0]);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+
+ /* final route input should fail to deliver to the socket */
+ rc = mctp_route_input(&rt->rt, skbs[1]);
+ KUNIT_EXPECT_NE(test, rc, 0);
+
+ /* we should hold the only reference to both skbs */
+ KUNIT_EXPECT_EQ(test, refcount_read(&skbs[0]->users), 1);
+ kfree_skb(skbs[0]);
+
+ KUNIT_EXPECT_EQ(test, refcount_read(&skbs[1]->users), 1);
+ kfree_skb(skbs[1]);
+
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
#if IS_ENABLED(CONFIG_MCTP_FLOWS)
static void mctp_test_flow_init(struct kunit *test,
@@ -1053,6 +1137,8 @@ static struct kunit_case mctp_test_cases[] = {
mctp_route_input_sk_reasm_gen_params),
KUNIT_CASE_PARAM(mctp_test_route_input_sk_keys,
mctp_route_input_sk_keys_gen_params),
+ KUNIT_CASE(mctp_test_route_input_sk_fail_single),
+ KUNIT_CASE(mctp_test_route_input_sk_fail_frag),
KUNIT_CASE(mctp_test_route_input_multiple_nets_bind),
KUNIT_CASE(mctp_test_route_input_multiple_nets_key),
KUNIT_CASE(mctp_test_packet_flow),
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index bfae7066936b..db794fe1300e 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -611,6 +611,8 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
return true;
}
+static struct lock_class_key list_set_lockdep_key;
+
static int
list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
u32 flags)
@@ -627,6 +629,7 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (size < IP_SET_LIST_MIN_SIZE)
size = IP_SET_LIST_MIN_SIZE;
+ lockdep_set_class(&set->lock, &list_set_lockdep_key);
set->variant = &set_variant;
set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
__alignof__(struct set_elem));
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 98d7dbe3d787..c0289f83f96d 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1495,8 +1495,8 @@ int __init ip_vs_conn_init(void)
max_avail -= 2; /* ~4 in hash row */
max_avail -= 1; /* IPVS up to 1/2 of mem */
max_avail -= order_base_2(sizeof(struct ip_vs_conn));
- max = clamp(max, min, max_avail);
- ip_vs_conn_tab_bits = clamp_val(ip_vs_conn_tab_bits, min, max);
+ max = clamp(max_avail, min, max);
+ ip_vs_conn_tab_bits = clamp(ip_vs_conn_tab_bits, min, max);
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
diff --git a/net/psample/psample.c b/net/psample/psample.c
index a0ddae8a65f9..25f92ba0840c 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -393,7 +393,9 @@ void psample_sample_packet(struct psample_group *group,
nla_total_size_64bit(sizeof(u64)) + /* timestamp */
nla_total_size(sizeof(u16)) + /* protocol */
(md->user_cookie_len ?
- nla_total_size(md->user_cookie_len) : 0); /* user cookie */
+ nla_total_size(md->user_cookie_len) : 0) + /* user cookie */
+ (md->rate_as_probability ?
+ nla_total_size(0) : 0); /* rate as probability */
#ifdef CONFIG_INET
tun_info = skb_tunnel_info(skb);
@@ -498,8 +500,9 @@ void psample_sample_packet(struct psample_group *group,
md->user_cookie))
goto error;
- if (md->rate_as_probability)
- nla_put_flag(nl_skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY);
+ if (md->rate_as_probability &&
+ nla_put_flag(nl_skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY))
+ goto error;
genlmsg_end(nl_skb, data);
genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9e6c69d18581..6cc7b846cff1 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2032,6 +2032,8 @@ static int smc_listen_prfx_check(struct smc_sock *new_smc,
if (pclc->hdr.typev1 == SMC_TYPE_N)
return 0;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (!pclc_prfx)
+ return -EPROTO;
if (smc_clc_prfx_match(newclcsock, pclc_prfx))
return SMC_CLC_DECL_DIFFPREFIX;
@@ -2145,6 +2147,8 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
pclc_smcd = smc_get_clc_msg_smcd(pclc);
smc_v2_ext = smc_get_clc_v2_ext(pclc);
smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
+ if (!pclc_smcd || !smc_v2_ext || !smcd_v2_ext)
+ goto not_found;
mutex_lock(&smcd_dev_list.mutex);
if (pclc_smcd->ism.chid) {
@@ -2221,7 +2225,9 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
int rc = 0;
/* check if ISM V1 is available */
- if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
+ if (!(ini->smcd_version & SMC_V1) ||
+ !smcd_indicated(ini->smc_type_v1) ||
+ !pclc_smcd)
goto not_found;
ini->is_smcd = true; /* prepare ISM check */
ini->ism_peer_gid[0].gid = ntohll(pclc_smcd->ism.gid);
@@ -2272,7 +2278,8 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
goto not_found;
smc_v2_ext = smc_get_clc_v2_ext(pclc);
- if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
+ if (!smc_v2_ext ||
+ !smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
goto not_found;
/* prepare RDMA check */
@@ -2881,6 +2888,13 @@ __poll_t smc_poll(struct file *file, struct socket *sock,
} else {
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+
+ if (sk->sk_state != SMC_INIT) {
+ /* Race breaker the same way as tcp_poll(). */
+ smp_mb__after_atomic();
+ if (atomic_read(&smc->conn.sndbuf_space))
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ }
}
if (atomic_read(&smc->conn.bytes_to_rcv))
mask |= EPOLLIN | EPOLLRDNORM;
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 33fa787c28eb..521f5df80e10 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -352,8 +352,11 @@ static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
struct smc_clc_msg_hdr *hdr = &pclc->hdr;
struct smc_clc_v2_extension *v2_ext;
- v2_ext = smc_get_clc_v2_ext(pclc);
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (!pclc_prfx ||
+ pclc_prfx->ipv6_prefixes_cnt > SMC_CLC_MAX_V6_PREFIX)
+ return false;
+
if (hdr->version == SMC_V1) {
if (hdr->typev1 == SMC_TYPE_N)
return false;
@@ -365,6 +368,13 @@ static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
sizeof(struct smc_clc_msg_trail))
return false;
} else {
+ v2_ext = smc_get_clc_v2_ext(pclc);
+ if ((hdr->typev2 != SMC_TYPE_N &&
+ (!v2_ext || v2_ext->hdr.eid_cnt > SMC_CLC_MAX_UEID)) ||
+ (smcd_indicated(hdr->typev2) &&
+ v2_ext->hdr.ism_gid_cnt > SMCD_CLC_MAX_V2_GID_ENTRIES))
+ return false;
+
if (ntohs(hdr->length) !=
sizeof(*pclc) +
sizeof(struct smc_clc_msg_smcd) +
@@ -764,6 +774,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
SMC_CLC_RECV_BUF_LEN : datlen;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, recvlen);
len = sock_recvmsg(smc->clcsock, &msg, krflags);
+ if (len < recvlen) {
+ smc->sk.sk_err = EPROTO;
+ reason_code = -EPROTO;
+ goto out;
+ }
datlen -= len;
}
if (clcm->type == SMC_CLC_DECLINE) {
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 5fd6f5b8ef03..767289925410 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -336,8 +336,12 @@ struct smc_clc_msg_decline_v2 { /* clc decline message */
static inline struct smc_clc_msg_proposal_prefix *
smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
{
+ u16 offset = ntohs(pclc->iparea_offset);
+
+ if (offset > sizeof(struct smc_clc_msg_smcd))
+ return NULL;
return (struct smc_clc_msg_proposal_prefix *)
- ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
+ ((u8 *)pclc + sizeof(*pclc) + offset);
}
static inline bool smcr_indicated(int smc_type)
@@ -376,8 +380,14 @@ static inline struct smc_clc_v2_extension *
smc_get_clc_v2_ext(struct smc_clc_msg_proposal *prop)
{
struct smc_clc_msg_smcd *prop_smcd = smc_get_clc_msg_smcd(prop);
+ u16 max_offset;
- if (!prop_smcd || !ntohs(prop_smcd->v2_ext_offset))
+ max_offset = offsetof(struct smc_clc_msg_proposal_area, pclc_v2_ext) -
+ offsetof(struct smc_clc_msg_proposal_area, pclc_smcd) -
+ offsetofend(struct smc_clc_msg_smcd, v2_ext_offset);
+
+ if (!prop_smcd || !ntohs(prop_smcd->v2_ext_offset) ||
+ ntohs(prop_smcd->v2_ext_offset) > max_offset)
return NULL;
return (struct smc_clc_v2_extension *)
@@ -390,9 +400,15 @@ smc_get_clc_v2_ext(struct smc_clc_msg_proposal *prop)
static inline struct smc_clc_smcd_v2_extension *
smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext)
{
+ u16 max_offset = offsetof(struct smc_clc_msg_proposal_area, pclc_smcd_v2_ext) -
+ offsetof(struct smc_clc_msg_proposal_area, pclc_v2_ext) -
+ offsetof(struct smc_clc_v2_extension, hdr) -
+ offsetofend(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset);
+
if (!prop_v2ext)
return NULL;
- if (!ntohs(prop_v2ext->hdr.smcd_v2_ext_offset))
+ if (!ntohs(prop_v2ext->hdr.smcd_v2_ext_offset) ||
+ ntohs(prop_v2ext->hdr.smcd_v2_ext_offset) > max_offset)
return NULL;
return (struct smc_clc_smcd_v2_extension *)
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 500952c2e67b..3b125d348b4a 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1818,7 +1818,9 @@ void smcr_link_down_cond_sched(struct smc_link *lnk)
{
if (smc_link_downing(&lnk->state)) {
trace_smcr_link_down(lnk, __builtin_return_address(0));
- schedule_work(&lnk->link_down_wrk);
+ smcr_link_hold(lnk); /* smcr_link_put in link_down_wrk */
+ if (!schedule_work(&lnk->link_down_wrk))
+ smcr_link_put(lnk);
}
}
@@ -1850,11 +1852,14 @@ static void smc_link_down_work(struct work_struct *work)
struct smc_link_group *lgr = link->lgr;
if (list_empty(&lgr->list))
- return;
+ goto out;
wake_up_all(&lgr->llc_msg_waiter);
down_write(&lgr->llc_conf_mutex);
smcr_link_down(link);
up_write(&lgr->llc_conf_mutex);
+
+out:
+ smcr_link_put(link); /* smcr_link_hold by schedulers of link_down_work */
}
static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs
index b89c681d97c0..2fbfb6a94c11 100644
--- a/rust/kernel/net/phy.rs
+++ b/rust/kernel/net/phy.rs
@@ -860,7 +860,7 @@ impl DeviceMask {
/// ];
/// #[cfg(MODULE)]
/// #[no_mangle]
-/// static __mod_mdio__phydev_device_table: [::kernel::bindings::mdio_device_id; 2] = _DEVICE_TABLE;
+/// static __mod_device_table__mdio__phydev: [::kernel::bindings::mdio_device_id; 2] = _DEVICE_TABLE;
/// ```
#[macro_export]
macro_rules! module_phy_driver {
@@ -883,7 +883,7 @@ macro_rules! module_phy_driver {
#[cfg(MODULE)]
#[no_mangle]
- static __mod_mdio__phydev_device_table: [$crate::bindings::mdio_device_id;
+ static __mod_device_table__mdio__phydev: [$crate::bindings::mdio_device_id;
$crate::module_phy_driver!(@count_devices $($dev),+) + 1] = _DEVICE_TABLE;
};
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index fb787a5715f5..94ee49207a45 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -155,12 +155,13 @@ char *get_line(char **stringp)
/* A list of all modules we processed */
LIST_HEAD(modules);
-static struct module *find_module(const char *modname)
+static struct module *find_module(const char *filename, const char *modname)
{
struct module *mod;
list_for_each_entry(mod, &modules, list) {
- if (strcmp(mod->name, modname) == 0)
+ if (!strcmp(mod->dump_file, filename) &&
+ !strcmp(mod->name, modname))
return mod;
}
return NULL;
@@ -2030,10 +2031,10 @@ static void read_dump(const char *fname)
continue;
}
- mod = find_module(modname);
+ mod = find_module(fname, modname);
if (!mod) {
mod = new_module(modname, strlen(modname));
- mod->from_dump = true;
+ mod->dump_file = fname;
}
s = sym_add_exported(symname, mod, gpl_only, namespace);
sym_set_crc(s, crc);
@@ -2052,7 +2053,7 @@ static void write_dump(const char *fname)
struct symbol *sym;
list_for_each_entry(mod, &modules, list) {
- if (mod->from_dump)
+ if (mod->dump_file)
continue;
list_for_each_entry(sym, &mod->exported_symbols, list) {
if (trim_unused_exports && !sym->used)
@@ -2076,7 +2077,7 @@ static void write_namespace_deps_files(const char *fname)
list_for_each_entry(mod, &modules, list) {
- if (mod->from_dump || list_empty(&mod->missing_namespaces))
+ if (mod->dump_file || list_empty(&mod->missing_namespaces))
continue;
buf_printf(&ns_deps_buf, "%s.ko:", mod->name);
@@ -2194,7 +2195,7 @@ int main(int argc, char **argv)
read_symbols_from_files(files_source);
list_for_each_entry(mod, &modules, list) {
- if (mod->from_dump || mod->is_vmlinux)
+ if (mod->dump_file || mod->is_vmlinux)
continue;
check_modname_len(mod);
@@ -2205,7 +2206,7 @@ int main(int argc, char **argv)
handle_white_list_exports(unused_exports_white_list);
list_for_each_entry(mod, &modules, list) {
- if (mod->from_dump)
+ if (mod->dump_file)
continue;
if (mod->is_vmlinux)
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 49848fcbe2a1..8b72c227ebf4 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -95,14 +95,15 @@ struct module_alias {
/**
* struct module - represent a module (vmlinux or *.ko)
*
+ * @dump_file: path to the .symvers file if loaded from a file
* @aliases: list head for module_aliases
*/
struct module {
struct list_head list;
struct list_head exported_symbols;
struct list_head unresolved_symbols;
+ const char *dump_file;
bool is_gpl_compatible;
- bool from_dump; /* true if module was loaded from *.symvers */
bool is_vmlinux;
bool seen;
bool has_init;
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index fb686fd3266f..ad7aba0f268e 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -63,6 +63,12 @@ install_linux_image () {
esac
cp "$(${MAKE} -s -f ${srctree}/Makefile image_name)" "${pdir}/${installed_image_path}"
+ if [ "${ARCH}" != um ]; then
+ install_maint_scripts "${pdir}"
+ fi
+}
+
+install_maint_scripts () {
# Install the maintainer scripts
# Note: hook scripts under /etc/kernel are also executed by official Debian
# kernel packages, as well as kernel packages built using make-kpkg.
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index 4ffcc70f8e31..b038a1380b8a 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -70,6 +70,13 @@ set_debarch() {
debarch=sh4$(if_enabled_echo CONFIG_CPU_BIG_ENDIAN eb)
fi
;;
+ um)
+ if is_enabled CONFIG_64BIT; then
+ debarch=amd64
+ else
+ debarch=i386
+ fi
+ ;;
esac
if [ -z "$debarch" ]; then
debarch=$(dpkg-architecture -qDEB_HOST_ARCH)
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 971c45d576ba..3d5c563cfc4c 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -979,7 +979,10 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd,
return;
break;
default:
- BUG();
+ pr_warn_once(
+ "SELinux: unknown extended permission (%u) will be ignored\n",
+ node->datum.u.xperms->specified);
+ return;
}
if (node->key.specified == AVTAB_XPERMS_ALLOWED) {
@@ -998,7 +1001,8 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd,
&node->datum.u.xperms->perms,
xpermd->dontaudit);
} else {
- BUG();
+ pr_warn_once("SELinux: unknown specified key (%u)\n",
+ node->key.specified);
}
}
diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 8e88830e8e57..678540b78280 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -29,8 +29,8 @@ config SND_SOC_FSL_SAI
config SND_SOC_FSL_MQS
tristate "Medium Quality Sound (MQS) module support"
depends on SND_SOC_FSL_SAI
+ depends on IMX_SCMI_MISC_DRV || !IMX_SCMI_MISC_DRV
select REGMAP_MMIO
- select IMX_SCMI_MISC_DRV if IMX_SCMI_MISC_EXT !=n
help
Say Y if you want to add Medium Quality Sound (MQS)
support for the Freescale CPUs.
diff --git a/tools/hv/.gitignore b/tools/hv/.gitignore
new file mode 100644
index 000000000000..0c5bc15d602f
--- /dev/null
+++ b/tools/hv/.gitignore
@@ -0,0 +1,3 @@
+hv_fcopy_uio_daemon
+hv_kvp_daemon
+hv_vss_daemon
diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c
index 7a00f3066a98..0198321d14a2 100644
--- a/tools/hv/hv_fcopy_uio_daemon.c
+++ b/tools/hv/hv_fcopy_uio_daemon.c
@@ -35,8 +35,6 @@
#define WIN8_SRV_MINOR 1
#define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR)
-#define MAX_FOLDER_NAME 15
-#define MAX_PATH_LEN 15
#define FCOPY_UIO "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/uio"
#define FCOPY_VER_COUNT 1
@@ -51,7 +49,7 @@ static const int fw_versions[] = {
#define HV_RING_SIZE 0x4000 /* 16KB ring buffer size */
-unsigned char desc[HV_RING_SIZE];
+static unsigned char desc[HV_RING_SIZE];
static int target_fd;
static char target_fname[PATH_MAX];
@@ -409,8 +407,8 @@ int main(int argc, char *argv[])
struct vmbus_br txbr, rxbr;
void *ring;
uint32_t len = HV_RING_SIZE;
- char uio_name[MAX_FOLDER_NAME] = {0};
- char uio_dev_path[MAX_PATH_LEN] = {0};
+ char uio_name[NAME_MAX] = {0};
+ char uio_dev_path[PATH_MAX] = {0};
static struct option long_options[] = {
{"help", no_argument, 0, 'h' },
@@ -468,8 +466,10 @@ int main(int argc, char *argv[])
*/
ret = pread(fcopy_fd, &tmp, sizeof(int), 0);
if (ret < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
syslog(LOG_ERR, "pread failed: %s", strerror(errno));
- continue;
+ goto close;
}
len = HV_RING_SIZE;
diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh
index 058c17b46ffc..268521234d4b 100755
--- a/tools/hv/hv_get_dns_info.sh
+++ b/tools/hv/hv_get_dns_info.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
# This example script parses /etc/resolv.conf to retrive DNS information.
# In the interest of keeping the KVP daemon code free of distro specific
@@ -10,4 +10,4 @@
# this script can be based on the Network Manager APIs for retrieving DNS
# entries.
-cat /etc/resolv.conf 2>/dev/null | awk '/^nameserver/ { print $2 }'
+exec awk '/^nameserver/ { print $2 }' /etc/resolv.conf 2>/dev/null
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index ae57bf69ad4a..04ba035d67e9 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -725,7 +725,7 @@ static void kvp_get_ipconfig_info(char *if_name,
* .
*/
- sprintf(cmd, KVP_SCRIPTS_PATH "%s", "hv_get_dns_info");
+ sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dns_info", if_name);
/*
* Execute the command to gather DNS info.
@@ -742,7 +742,7 @@ static void kvp_get_ipconfig_info(char *if_name,
* Enabled: DHCP enabled.
*/
- sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dhcp_info", if_name);
+ sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dhcp_info", if_name);
file = popen(cmd, "r");
if (file == NULL)
@@ -1606,8 +1606,9 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
* invoke the external script to do its magic.
*/
- str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s %s",
- "hv_set_ifconfig", if_filename, nm_filename);
+ str_len = snprintf(cmd, sizeof(cmd), "exec %s %s %s",
+ KVP_SCRIPTS_PATH "hv_set_ifconfig",
+ if_filename, nm_filename);
/*
* This is a little overcautious, but it's necessary to suppress some
* false warnings from gcc 8.0.1.
diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh
index 440a91b35823..2f8baed2b8f7 100755
--- a/tools/hv/hv_set_ifconfig.sh
+++ b/tools/hv/hv_set_ifconfig.sh
@@ -81,7 +81,7 @@ echo "ONBOOT=yes" >> $1
cp $1 /etc/sysconfig/network-scripts/
-chmod 600 $2
+umask 0177
interface=$(echo $2 | awk -F - '{ print $2 }')
filename="${2##*/}"
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 01ec01a90e76..eea29359a899 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -556,10 +556,10 @@ class YnlFamily(SpecFamily):
if attr["type"] == 'nest':
nl_type |= Netlink.NLA_F_NESTED
attr_payload = b''
- sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs)
+ sub_space = attr['nested-attributes']
+ sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs)
for subname, subvalue in value.items():
- attr_payload += self._add_attr(attr['nested-attributes'],
- subname, subvalue, sub_attrs)
+ attr_payload += self._add_attr(sub_space, subname, subvalue, sub_attrs)
elif attr["type"] == 'flag':
if not value:
# If value is absent or false then skip attribute creation.
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 4ce176ad411f..76060da755b5 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3820,9 +3820,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
break;
case INSN_CONTEXT_SWITCH:
- if (func && (!next_insn || !next_insn->hint)) {
- WARN_INSN(insn, "unsupported instruction in callable function");
- return 1;
+ if (func) {
+ if (!next_insn || !next_insn->hint) {
+ WARN_INSN(insn, "unsupported instruction in callable function");
+ return 1;
+ }
+ break;
}
return 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
new file mode 100644
index 000000000000..1bdfb79ef009
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SOCKET_HELPERS__
+#define __SOCKET_HELPERS__
+
+#include <linux/vm_sockets.h>
+
+/* include/linux/net.h */
+#define SOCK_TYPE_MASK 0xf
+
+#define IO_TIMEOUT_SEC 30
+#define MAX_STRERR_LEN 256
+
+/* workaround for older vm_sockets.h */
+#ifndef VMADDR_CID_LOCAL
+#define VMADDR_CID_LOCAL 1
+#endif
+
+/* include/linux/cleanup.h */
+#define __get_and_null(p, nullvalue) \
+ ({ \
+ __auto_type __ptr = &(p); \
+ __auto_type __val = *__ptr; \
+ *__ptr = nullvalue; \
+ __val; \
+ })
+
+#define take_fd(fd) __get_and_null(fd, -EBADF)
+
+/* Wrappers that fail the test on error and report it. */
+
+#define _FAIL(errnum, fmt...) \
+ ({ \
+ error_at_line(0, (errnum), __func__, __LINE__, fmt); \
+ CHECK_FAIL(true); \
+ })
+#define FAIL(fmt...) _FAIL(0, fmt)
+#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
+#define FAIL_LIBBPF(err, msg) \
+ ({ \
+ char __buf[MAX_STRERR_LEN]; \
+ libbpf_strerror((err), __buf, sizeof(__buf)); \
+ FAIL("%s: %s", (msg), __buf); \
+ })
+
+
+#define xaccept_nonblock(fd, addr, len) \
+ ({ \
+ int __ret = \
+ accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("accept"); \
+ __ret; \
+ })
+
+#define xbind(fd, addr, len) \
+ ({ \
+ int __ret = bind((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("bind"); \
+ __ret; \
+ })
+
+#define xclose(fd) \
+ ({ \
+ int __ret = close((fd)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("close"); \
+ __ret; \
+ })
+
+#define xconnect(fd, addr, len) \
+ ({ \
+ int __ret = connect((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("connect"); \
+ __ret; \
+ })
+
+#define xgetsockname(fd, addr, len) \
+ ({ \
+ int __ret = getsockname((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockname"); \
+ __ret; \
+ })
+
+#define xgetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = getsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xlisten(fd, backlog) \
+ ({ \
+ int __ret = listen((fd), (backlog)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("listen"); \
+ __ret; \
+ })
+
+#define xsetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = setsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("setsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xsend(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = send((fd), (buf), (len), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("send"); \
+ __ret; \
+ })
+
+#define xrecv_nonblock(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
+ IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("recv"); \
+ __ret; \
+ })
+
+#define xsocket(family, sotype, flags) \
+ ({ \
+ int __ret = socket(family, sotype, flags); \
+ if (__ret == -1) \
+ FAIL_ERRNO("socket"); \
+ __ret; \
+ })
+
+static inline void close_fd(int *fd)
+{
+ if (*fd >= 0)
+ xclose(*fd);
+}
+
+#define __close_fd __attribute__((cleanup(close_fd)))
+
+static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
+{
+ return (struct sockaddr *)ss;
+}
+
+static inline void init_addr_loopback4(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static inline void init_addr_loopback6(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = 0;
+ addr6->sin6_addr = in6addr_loopback;
+ *len = sizeof(*addr6);
+}
+
+static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->svm_family = AF_VSOCK;
+ addr->svm_port = VMADDR_PORT_ANY;
+ addr->svm_cid = VMADDR_CID_LOCAL;
+ *len = sizeof(*addr);
+}
+
+static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ switch (family) {
+ case AF_INET:
+ init_addr_loopback4(ss, len);
+ return;
+ case AF_INET6:
+ init_addr_loopback6(ss, len);
+ return;
+ case AF_VSOCK:
+ init_addr_loopback_vsock(ss, len);
+ return;
+ default:
+ FAIL("unsupported address family %d", family);
+ }
+}
+
+static inline int enable_reuseport(int s, int progfd)
+{
+ int err, one = 1;
+
+ err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err)
+ return -1;
+ err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
+ sizeof(progfd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = 0;
+ int err, s;
+
+ init_addr_loopback(family, &addr, &len);
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return -1;
+
+ if (progfd >= 0)
+ enable_reuseport(s, progfd);
+
+ err = xbind(s, sockaddr(&addr), len);
+ if (err)
+ goto close;
+
+ if (sotype & SOCK_DGRAM)
+ return s;
+
+ err = xlisten(s, SOMAXCONN);
+ if (err)
+ goto close;
+
+ return s;
+close:
+ xclose(s);
+ return -1;
+}
+
+static inline int socket_loopback(int family, int sotype)
+{
+ return socket_loopback_reuseport(family, sotype, -1);
+}
+
+static inline int poll_connect(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set wfds;
+ int r, eval;
+ socklen_t esize = sizeof(eval);
+
+ FD_ZERO(&wfds);
+ FD_SET(fd, &wfds);
+
+ r = select(fd + 1, NULL, &wfds, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+ if (r != 1)
+ return -1;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
+ return -1;
+ if (eval != 0) {
+ errno = eval;
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int poll_read(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set rfds;
+ int r;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+
+ r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+
+ return r == 1 ? 0 : -1;
+}
+
+static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return accept(fd, addr, len);
+}
+
+static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return recv(fd, buf, len, flags);
+}
+
+
+static inline int create_pair(int family, int sotype, int *p0, int *p1)
+{
+ __close_fd int s, c = -1, p = -1;
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int err;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return s;
+
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ return c;
+
+ err = connect(c, sockaddr(&addr), len);
+ if (err) {
+ if (errno != EINPROGRESS) {
+ FAIL_ERRNO("connect");
+ return err;
+ }
+
+ err = poll_connect(c, IO_TIMEOUT_SEC);
+ if (err) {
+ FAIL_ERRNO("poll_connect");
+ return err;
+ }
+ }
+
+ switch (sotype & SOCK_TYPE_MASK) {
+ case SOCK_DGRAM:
+ err = xgetsockname(c, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ err = xconnect(s, sockaddr(&addr), len);
+ if (err)
+ return err;
+
+ *p0 = take_fd(s);
+ break;
+ case SOCK_STREAM:
+ case SOCK_SEQPACKET:
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p < 0)
+ return p;
+
+ *p0 = take_fd(p);
+ break;
+ default:
+ FAIL("Unsupported socket type %#x", sotype);
+ return -EOPNOTSUPP;
+ }
+
+ *p1 = take_fd(c);
+ return 0;
+}
+
+static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
+ int *p0, int *p1)
+{
+ int err;
+
+ err = create_pair(family, sotype, c0, p0);
+ if (err)
+ return err;
+
+ err = create_pair(family, sotype, c1, p1);
+ if (err) {
+ close(*c0);
+ close(*p0);
+ }
+
+ return err;
+}
+
+#endif // __SOCKET_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 248754296d97..884ad87783d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -12,6 +12,7 @@
#include "test_sockmap_progs_query.skel.h"
#include "test_sockmap_pass_prog.skel.h"
#include "test_sockmap_drop_prog.skel.h"
+#include "test_sockmap_change_tail.skel.h"
#include "bpf_iter_sockmap.skel.h"
#include "sockmap_helpers.h"
@@ -643,6 +644,54 @@ out:
test_sockmap_drop_prog__destroy(drop);
}
+static void test_sockmap_skb_verdict_change_tail(void)
+{
+ struct test_sockmap_change_tail *skel;
+ int err, map, verdict;
+ int c1, p1, sent, recvd;
+ int zero = 0;
+ char buf[2];
+
+ skel = test_sockmap_change_tail__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+ err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+ goto out_close;
+ sent = xsend(p1, "Tr", 2, 0);
+ ASSERT_EQ(sent, 2, "xsend(p1)");
+ recvd = recv(c1, buf, 2, 0);
+ ASSERT_EQ(recvd, 1, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+ sent = xsend(p1, "G", 1, 0);
+ ASSERT_EQ(sent, 1, "xsend(p1)");
+ recvd = recv(c1, buf, 2, 0);
+ ASSERT_EQ(recvd, 2, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+ sent = xsend(p1, "E", 1, 0);
+ ASSERT_EQ(sent, 1, "xsend(p1)");
+ recvd = recv(c1, buf, 1, 0);
+ ASSERT_EQ(recvd, 1, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+out_close:
+ close(c1);
+ close(p1);
+out:
+ test_sockmap_change_tail__destroy(skel);
+}
+
static void test_sockmap_skb_verdict_peek_helper(int map)
{
int err, c1, p1, zero = 0, sent, recvd, avail;
@@ -1058,6 +1107,8 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_fionread(true);
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
test_sockmap_skb_verdict_fionread(false);
+ if (test__start_subtest("sockmap skb_verdict change tail"))
+ test_sockmap_skb_verdict_change_tail();
if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
test_sockmap_skb_verdict_peek();
if (test__start_subtest("sockmap skb_verdict msg_f_peek with link"))
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index 38e35c72bdaa..3e5571dd578d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -1,139 +1,12 @@
#ifndef __SOCKMAP_HELPERS__
#define __SOCKMAP_HELPERS__
-#include <linux/vm_sockets.h>
+#include "socket_helpers.h"
-/* include/linux/net.h */
-#define SOCK_TYPE_MASK 0xf
-
-#define IO_TIMEOUT_SEC 30
-#define MAX_STRERR_LEN 256
#define MAX_TEST_NAME 80
-/* workaround for older vm_sockets.h */
-#ifndef VMADDR_CID_LOCAL
-#define VMADDR_CID_LOCAL 1
-#endif
-
#define __always_unused __attribute__((__unused__))
-/* include/linux/cleanup.h */
-#define __get_and_null(p, nullvalue) \
- ({ \
- __auto_type __ptr = &(p); \
- __auto_type __val = *__ptr; \
- *__ptr = nullvalue; \
- __val; \
- })
-
-#define take_fd(fd) __get_and_null(fd, -EBADF)
-
-#define _FAIL(errnum, fmt...) \
- ({ \
- error_at_line(0, (errnum), __func__, __LINE__, fmt); \
- CHECK_FAIL(true); \
- })
-#define FAIL(fmt...) _FAIL(0, fmt)
-#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
-#define FAIL_LIBBPF(err, msg) \
- ({ \
- char __buf[MAX_STRERR_LEN]; \
- libbpf_strerror((err), __buf, sizeof(__buf)); \
- FAIL("%s: %s", (msg), __buf); \
- })
-
-/* Wrappers that fail the test on error and report it. */
-
-#define xaccept_nonblock(fd, addr, len) \
- ({ \
- int __ret = \
- accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
- if (__ret == -1) \
- FAIL_ERRNO("accept"); \
- __ret; \
- })
-
-#define xbind(fd, addr, len) \
- ({ \
- int __ret = bind((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("bind"); \
- __ret; \
- })
-
-#define xclose(fd) \
- ({ \
- int __ret = close((fd)); \
- if (__ret == -1) \
- FAIL_ERRNO("close"); \
- __ret; \
- })
-
-#define xconnect(fd, addr, len) \
- ({ \
- int __ret = connect((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("connect"); \
- __ret; \
- })
-
-#define xgetsockname(fd, addr, len) \
- ({ \
- int __ret = getsockname((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("getsockname"); \
- __ret; \
- })
-
-#define xgetsockopt(fd, level, name, val, len) \
- ({ \
- int __ret = getsockopt((fd), (level), (name), (val), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("getsockopt(" #name ")"); \
- __ret; \
- })
-
-#define xlisten(fd, backlog) \
- ({ \
- int __ret = listen((fd), (backlog)); \
- if (__ret == -1) \
- FAIL_ERRNO("listen"); \
- __ret; \
- })
-
-#define xsetsockopt(fd, level, name, val, len) \
- ({ \
- int __ret = setsockopt((fd), (level), (name), (val), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("setsockopt(" #name ")"); \
- __ret; \
- })
-
-#define xsend(fd, buf, len, flags) \
- ({ \
- ssize_t __ret = send((fd), (buf), (len), (flags)); \
- if (__ret == -1) \
- FAIL_ERRNO("send"); \
- __ret; \
- })
-
-#define xrecv_nonblock(fd, buf, len, flags) \
- ({ \
- ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
- IO_TIMEOUT_SEC); \
- if (__ret == -1) \
- FAIL_ERRNO("recv"); \
- __ret; \
- })
-
-#define xsocket(family, sotype, flags) \
- ({ \
- int __ret = socket(family, sotype, flags); \
- if (__ret == -1) \
- FAIL_ERRNO("socket"); \
- __ret; \
- })
-
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
@@ -193,130 +66,6 @@
__ret; \
})
-static inline void close_fd(int *fd)
-{
- if (*fd >= 0)
- xclose(*fd);
-}
-
-#define __close_fd __attribute__((cleanup(close_fd)))
-
-static inline int poll_connect(int fd, unsigned int timeout_sec)
-{
- struct timeval timeout = { .tv_sec = timeout_sec };
- fd_set wfds;
- int r, eval;
- socklen_t esize = sizeof(eval);
-
- FD_ZERO(&wfds);
- FD_SET(fd, &wfds);
-
- r = select(fd + 1, NULL, &wfds, NULL, &timeout);
- if (r == 0)
- errno = ETIME;
- if (r != 1)
- return -1;
-
- if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
- return -1;
- if (eval != 0) {
- errno = eval;
- return -1;
- }
-
- return 0;
-}
-
-static inline int poll_read(int fd, unsigned int timeout_sec)
-{
- struct timeval timeout = { .tv_sec = timeout_sec };
- fd_set rfds;
- int r;
-
- FD_ZERO(&rfds);
- FD_SET(fd, &rfds);
-
- r = select(fd + 1, &rfds, NULL, NULL, &timeout);
- if (r == 0)
- errno = ETIME;
-
- return r == 1 ? 0 : -1;
-}
-
-static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return accept(fd, addr, len);
-}
-
-static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return recv(fd, buf, len, flags);
-}
-
-static inline void init_addr_loopback4(struct sockaddr_storage *ss,
- socklen_t *len)
-{
- struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
-
- addr4->sin_family = AF_INET;
- addr4->sin_port = 0;
- addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- *len = sizeof(*addr4);
-}
-
-static inline void init_addr_loopback6(struct sockaddr_storage *ss,
- socklen_t *len)
-{
- struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
-
- addr6->sin6_family = AF_INET6;
- addr6->sin6_port = 0;
- addr6->sin6_addr = in6addr_loopback;
- *len = sizeof(*addr6);
-}
-
-static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
- socklen_t *len)
-{
- struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
-
- addr->svm_family = AF_VSOCK;
- addr->svm_port = VMADDR_PORT_ANY;
- addr->svm_cid = VMADDR_CID_LOCAL;
- *len = sizeof(*addr);
-}
-
-static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
- socklen_t *len)
-{
- switch (family) {
- case AF_INET:
- init_addr_loopback4(ss, len);
- return;
- case AF_INET6:
- init_addr_loopback6(ss, len);
- return;
- case AF_VSOCK:
- init_addr_loopback_vsock(ss, len);
- return;
- default:
- FAIL("unsupported address family %d", family);
- }
-}
-
-static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
-{
- return (struct sockaddr *)ss;
-}
-
static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
{
u64 value;
@@ -334,136 +83,4 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
}
-static inline int enable_reuseport(int s, int progfd)
-{
- int err, one = 1;
-
- err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
- if (err)
- return -1;
- err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
- sizeof(progfd));
- if (err)
- return -1;
-
- return 0;
-}
-
-static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
-{
- struct sockaddr_storage addr;
- socklen_t len = 0;
- int err, s;
-
- init_addr_loopback(family, &addr, &len);
-
- s = xsocket(family, sotype, 0);
- if (s == -1)
- return -1;
-
- if (progfd >= 0)
- enable_reuseport(s, progfd);
-
- err = xbind(s, sockaddr(&addr), len);
- if (err)
- goto close;
-
- if (sotype & SOCK_DGRAM)
- return s;
-
- err = xlisten(s, SOMAXCONN);
- if (err)
- goto close;
-
- return s;
-close:
- xclose(s);
- return -1;
-}
-
-static inline int socket_loopback(int family, int sotype)
-{
- return socket_loopback_reuseport(family, sotype, -1);
-}
-
-static inline int create_pair(int family, int sotype, int *p0, int *p1)
-{
- __close_fd int s, c = -1, p = -1;
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int err;
-
- s = socket_loopback(family, sotype);
- if (s < 0)
- return s;
-
- err = xgetsockname(s, sockaddr(&addr), &len);
- if (err)
- return err;
-
- c = xsocket(family, sotype, 0);
- if (c < 0)
- return c;
-
- err = connect(c, sockaddr(&addr), len);
- if (err) {
- if (errno != EINPROGRESS) {
- FAIL_ERRNO("connect");
- return err;
- }
-
- err = poll_connect(c, IO_TIMEOUT_SEC);
- if (err) {
- FAIL_ERRNO("poll_connect");
- return err;
- }
- }
-
- switch (sotype & SOCK_TYPE_MASK) {
- case SOCK_DGRAM:
- err = xgetsockname(c, sockaddr(&addr), &len);
- if (err)
- return err;
-
- err = xconnect(s, sockaddr(&addr), len);
- if (err)
- return err;
-
- *p0 = take_fd(s);
- break;
- case SOCK_STREAM:
- case SOCK_SEQPACKET:
- p = xaccept_nonblock(s, NULL, NULL);
- if (p < 0)
- return p;
-
- *p0 = take_fd(p);
- break;
- default:
- FAIL("Unsupported socket type %#x", sotype);
- return -EOPNOTSUPP;
- }
-
- *p1 = take_fd(c);
- return 0;
-}
-
-static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
- int *p0, int *p1)
-{
- int err;
-
- err = create_pair(family, sotype, c0, p0);
- if (err)
- return err;
-
- err = create_pair(family, sotype, c1, p1);
- if (err) {
- close(*c0);
- close(*p0);
- }
-
- return err;
-}
-
#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c
new file mode 100644
index 000000000000..74752233e779
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <test_progs.h>
+#include <linux/pkt_cls.h>
+
+#include "test_tc_change_tail.skel.h"
+#include "socket_helpers.h"
+
+#define LO_IFINDEX 1
+
+void test_tc_change_tail(void)
+{
+ LIBBPF_OPTS(bpf_tcx_opts, tcx_opts);
+ struct test_tc_change_tail *skel = NULL;
+ struct bpf_link *link;
+ int c1, p1;
+ char buf[2];
+ int ret;
+
+ skel = test_tc_change_tail__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tc_change_tail__open_and_load"))
+ return;
+
+ link = bpf_program__attach_tcx(skel->progs.change_tail, LO_IFINDEX,
+ &tcx_opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_tcx"))
+ goto destroy;
+
+ skel->links.change_tail = link;
+ ret = create_pair(AF_INET, SOCK_DGRAM, &c1, &p1);
+ if (!ASSERT_OK(ret, "create_pair"))
+ goto destroy;
+
+ ret = xsend(p1, "Tr", 2, 0);
+ ASSERT_EQ(ret, 2, "xsend(p1)");
+ ret = recv(c1, buf, 2, 0);
+ ASSERT_EQ(ret, 2, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+ ret = xsend(p1, "G", 1, 0);
+ ASSERT_EQ(ret, 1, "xsend(p1)");
+ ret = recv(c1, buf, 2, 0);
+ ASSERT_EQ(ret, 1, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+ ret = xsend(p1, "E", 1, 0);
+ ASSERT_EQ(ret, 1, "xsend(p1)");
+ ret = recv(c1, buf, 1, 0);
+ ASSERT_EQ(ret, 1, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+ ret = xsend(p1, "Z", 1, 0);
+ ASSERT_EQ(ret, 1, "xsend(p1)");
+ ret = recv(c1, buf, 1, 0);
+ ASSERT_EQ(ret, 1, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+ close(c1);
+ close(p1);
+destroy:
+ test_tc_change_tail__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
new file mode 100644
index 000000000000..2796dd8545eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 ByteDance */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+long change_tail_ret = 1;
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ char *data, *data_end;
+
+ bpf_skb_pull_data(skb, 1);
+ data = (char *)(unsigned long)skb->data;
+ data_end = (char *)(unsigned long)skb->data_end;
+
+ if (data + 1 > data_end)
+ return SK_PASS;
+
+ if (data[0] == 'T') { /* Trim the packet */
+ change_tail_ret = bpf_skb_change_tail(skb, skb->len - 1, 0);
+ return SK_PASS;
+ } else if (data[0] == 'G') { /* Grow the packet */
+ change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0);
+ return SK_PASS;
+ } else if (data[0] == 'E') { /* Error */
+ change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
+ return SK_PASS;
+ }
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
new file mode 100644
index 000000000000..28edafe803f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/pkt_cls.h>
+
+long change_tail_ret = 1;
+
+static __always_inline struct iphdr *parse_ip_header(struct __sk_buff *skb, int *ip_proto)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = data;
+ struct iphdr *iph;
+
+ /* Verify Ethernet header */
+ if ((void *)(data + sizeof(*eth)) > data_end)
+ return NULL;
+
+ /* Skip Ethernet header to get to IP header */
+ iph = (void *)(data + sizeof(struct ethhdr));
+
+ /* Verify IP header */
+ if ((void *)(data + sizeof(struct ethhdr) + sizeof(*iph)) > data_end)
+ return NULL;
+
+ /* Basic IP header validation */
+ if (iph->version != 4) /* Only support IPv4 */
+ return NULL;
+
+ if (iph->ihl < 5) /* Minimum IP header length */
+ return NULL;
+
+ *ip_proto = iph->protocol;
+ return iph;
+}
+
+static __always_inline struct udphdr *parse_udp_header(struct __sk_buff *skb, struct iphdr *iph)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *hdr = (void *)iph;
+ struct udphdr *udp;
+
+ /* Calculate UDP header position */
+ udp = hdr + (iph->ihl * 4);
+ hdr = (void *)udp;
+
+ /* Verify UDP header bounds */
+ if ((void *)(hdr + sizeof(*udp)) > data_end)
+ return NULL;
+
+ return udp;
+}
+
+SEC("tc/ingress")
+int change_tail(struct __sk_buff *skb)
+{
+ int len = skb->len;
+ struct udphdr *udp;
+ struct iphdr *iph;
+ void *data_end;
+ char *payload;
+ int ip_proto;
+
+ bpf_skb_pull_data(skb, len);
+
+ data_end = (void *)(long)skb->data_end;
+ iph = parse_ip_header(skb, &ip_proto);
+ if (!iph)
+ return TCX_PASS;
+
+ if (ip_proto != IPPROTO_UDP)
+ return TCX_PASS;
+
+ udp = parse_udp_header(skb, iph);
+ if (!udp)
+ return TCX_PASS;
+
+ payload = (char *)udp + (sizeof(struct udphdr));
+ if (payload + 1 > (char *)data_end)
+ return TCX_PASS;
+
+ if (payload[0] == 'T') { /* Trim the packet */
+ change_tail_ret = bpf_skb_change_tail(skb, len - 1, 0);
+ if (!change_tail_ret)
+ bpf_skb_change_tail(skb, len, 0);
+ return TCX_PASS;
+ } else if (payload[0] == 'G') { /* Grow the packet */
+ change_tail_ret = bpf_skb_change_tail(skb, len + 1, 0);
+ if (!change_tail_ret)
+ bpf_skb_change_tail(skb, len, 0);
+ return TCX_PASS;
+ } else if (payload[0] == 'E') { /* Error */
+ change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
+ return TCX_PASS;
+ } else if (payload[0] == 'Z') { /* Zero */
+ change_tail_ret = bpf_skb_change_tail(skb, 0, 0);
+ return TCX_PASS;
+ }
+ return TCX_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
index ca0162b4dc57..1fcfa5160231 100644
--- a/tools/testing/selftests/bpf/sdt.h
+++ b/tools/testing/selftests/bpf/sdt.h
@@ -102,6 +102,8 @@
# define STAP_SDT_ARG_CONSTRAINT nZr
# elif defined __arm__
# define STAP_SDT_ARG_CONSTRAINT g
+# elif defined __loongarch__
+# define STAP_SDT_ARG_CONSTRAINT nmr
# else
# define STAP_SDT_ARG_CONSTRAINT nor
# endif
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 2d742fdac6b9..81943c6254e6 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -293,6 +293,10 @@ static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *st
return 0;
}
#else
+# ifndef PROCMAP_QUERY_VMA_EXECUTABLE
+# define PROCMAP_QUERY_VMA_EXECUTABLE 0x04
+# endif
+
static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags)
{
return -EOPNOTSUPP;
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 30f29096e27c..9c5473abbd78 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -8,25 +8,28 @@ from lib.py import cmd
import glob
-def sys_get_queues(ifname) -> int:
- folders = glob.glob(f'/sys/class/net/{ifname}/queues/rx-*')
+def sys_get_queues(ifname, qtype='rx') -> int:
+ folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
return len(folders)
-def nl_get_queues(cfg, nl):
+def nl_get_queues(cfg, nl, qtype='rx'):
queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
if queues:
- return len([q for q in queues if q['type'] == 'rx'])
+ return len([q for q in queues if q['type'] == qtype])
return None
def get_queues(cfg, nl) -> None:
- queues = nl_get_queues(cfg, nl)
- if not queues:
- raise KsftSkipEx('queue-get not supported by device')
+ snl = NetdevFamily(recv_size=4096)
- expected = sys_get_queues(cfg.dev['ifname'])
- ksft_eq(queues, expected)
+ for qtype in ['rx', 'tx']:
+ queues = nl_get_queues(cfg, snl, qtype)
+ if not queues:
+ raise KsftSkipEx('queue-get not supported by device')
+
+ expected = sys_get_queues(cfg.dev['ifname'], qtype)
+ ksft_eq(queues, expected)
def addremove_queues(cfg, nl) -> None:
@@ -57,7 +60,7 @@ def addremove_queues(cfg, nl) -> None:
def main() -> None:
- with NetDrvEnv(__file__, queue_count=3) as cfg:
+ with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index 63e3c045a3b2..031ac9def6c0 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -110,6 +110,23 @@ def qstat_by_ifindex(cfg) -> None:
ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
+ # Sanity check the dumps
+ queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True)
+ # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}}
+ parsed = {}
+ for entry in queues:
+ ifindex = entry["ifindex"]
+ if ifindex not in parsed:
+ parsed[ifindex] = {"rx":[], "tx": []}
+ parsed[ifindex][entry["queue-type"]].append(entry['queue-id'])
+ # Now, validate
+ for ifindex, queues in parsed.items():
+ for qtype in ['rx', 'tx']:
+ ksft_eq(len(queues[qtype]), len(set(queues[qtype])),
+ comment="repeated queue keys")
+ ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1,
+ comment="missing queue keys")
+
# Test invalid dumps
# 0 is invalid
with ksft_raises(NlError) as cm:
@@ -158,7 +175,7 @@ def check_down(cfg) -> None:
def main() -> None:
- with NetDrvEnv(__file__) as cfg:
+ with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
check_down],
args=(cfg, ))
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 95af2d78fd31..0a0b55516028 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -9,6 +9,7 @@
#include <fcntl.h>
#include <linux/memfd.h>
#include <sched.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
@@ -1557,6 +1558,11 @@ static void test_share_fork(char *banner, char *b_suffix)
close(fd);
}
+static bool pid_ns_supported(void)
+{
+ return access("/proc/self/ns/pid", F_OK) == 0;
+}
+
int main(int argc, char **argv)
{
pid_t pid;
@@ -1591,8 +1597,12 @@ int main(int argc, char **argv)
test_seal_grow();
test_seal_resize();
- test_sysctl_simple();
- test_sysctl_nested();
+ if (pid_ns_supported()) {
+ test_sysctl_simple();
+ test_sysctl_nested();
+ } else {
+ printf("PID namespaces are not supported; skipping sysctl tests\n");
+ }
test_share_dup("SHARE-DUP", "");
test_share_mmap("SHARE-MMAP", "");
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index a0d689d58c57..076a7e8dc3eb 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -32,23 +32,23 @@ except ModuleNotFoundError as e:
# Set schema='' to avoid jsonschema validation, it's slow
#
class EthtoolFamily(YnlFamily):
- def __init__(self):
+ def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(),
- schema='')
+ schema='', recv_size=recv_size)
class RtnlFamily(YnlFamily):
- def __init__(self):
+ def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
- schema='')
+ schema='', recv_size=recv_size)
class NetdevFamily(YnlFamily):
- def __init__(self):
+ def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(),
- schema='')
+ schema='', recv_size=recv_size)
class NetshaperFamily(YnlFamily):
- def __init__(self):
+ def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(),
- schema='')
+ schema='', recv_size=recv_size)
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index cc0bfae2bafa..960e1ab4dd04 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -171,8 +171,10 @@ ovs_add_netns_and_veths () {
ovs_add_if "$1" "$2" "$4" -u || return 1
fi
- [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \
- tcpdump -i any -s 65535
+ if [ $TRACING -eq 1 ]; then
+ ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553
+ ovs_wait grep -q "listening on any" ${ovs_dir}/stderr
+ fi
return 0
}
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 771e32872b2a..6c6de1b1622b 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -78,7 +78,7 @@ quiet_cmd_hdrtest = HDRTEST $<
cmd_hdrtest = \
$(CC) $(c_flags) -fsyntax-only -x c /dev/null \
$(if $(filter-out $(no-header-test), $*.h), -include $< -include $<); \
- $(PERL) $(src)/headers_check.pl $(obj) $(SRCARCH) $<; \
+ $(PERL) $(src)/headers_check.pl $(obj) $<; \
touch $@
$(obj)/%.hdrtest: $(obj)/%.h FORCE
diff --git a/usr/include/headers_check.pl b/usr/include/headers_check.pl
index b6aec5e4365f..2b70bfa5558e 100755
--- a/usr/include/headers_check.pl
+++ b/usr/include/headers_check.pl
@@ -3,9 +3,8 @@
#
# headers_check.pl execute a number of trivial consistency checks
#
-# Usage: headers_check.pl dir arch [files...]
+# Usage: headers_check.pl dir [files...]
# dir: dir to look for included files
-# arch: architecture
# files: list of files to check
#
# The script reads the supplied files line by line and:
@@ -23,7 +22,7 @@ use warnings;
use strict;
use File::Basename;
-my ($dir, $arch, @files) = @ARGV;
+my ($dir, @files) = @ARGV;
my $ret = 0;
my $line;
@@ -55,10 +54,6 @@ sub check_include
my $found;
$found = stat($dir . "/" . $inc);
if (!$found) {
- $inc =~ s#asm/#asm-$arch/#;
- $found = stat($dir . "/" . $inc);
- }
- if (!$found) {
printf STDERR "$filename:$lineno: included file '$inc' is not exported\n";
$ret = 1;
}