Merge tag 'amd-drm-next-6.7-2023-10-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-6.7-2023-10-13: amdgpu: - DC replay fixes - Misc code cleanups and spelling fixes - Documentation updates - RAS EEPROM Updates - FRU EEPROM Updates - IP discovery updates - SR-IOV fixes - RAS updates - DC PQ fixes - SMU 13.0.6 updates - GC 11.5 Support - NBIO 7.11 Support - GMC 11 Updates - Reset fixes - SMU 11.5 Updates - SMU 13.0 OD support - Use flexible arrays for bo list handling - W=1 Fixes - SubVP fixes - DPIA fixes - DCN 3.5 Support - Devcoredump fixes - VPE 6.1 support - VCN 4.0 Updates - S/G display fixes - DML fixes - DML2 Support - MST fixes - VRR fixes - Enable seamless boot in more cases - Enable content type property for HDMI - OLED fixes - Rework and clean up GPUVM TLB flushing - DC ODM fixes - DP 2.x fixes - AGP aperture fixes - SDMA firmware loading cleanups - Cyan Skillfish GPU clock counter fix - GC 11 GART fix - Cache GPU fault info for userspace queries - DC cursor check fixes - eDP fixes - DC FP handling fixes - Variable sized array fixes - SMU 13.0.x fixes - IB start and size alignment fixes for VCN - SMU 14 Support - Suspend and resume sequence rework - vkms fix amdkfd: - GC 11 fixes - GC 10 fixes - Doorbell fixes - CWSR fixes - SVM fixes - Clean up GC info enumeration - Rework memory limit handling - Coherent memory handling fixes - Use partial migrations in GPU faults - TLB flush fixes - DMA unmap fixes - GC 9.4.3 fixes - SQ interrupt fix - GTT mapping fix - GC 11.5 Support radeon: - Misc code cleanups - W=1 Fixes - Fix possible buffer overflow - Fix possible NULL pointer dereference UAPI: - Add EXT_COHERENT memory allocation flags. These allow for system scope atomics. Proposed userspace: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88 - Add support for new VPE engine. This is a memory to memory copy engine with advanced scaling, CSC, and color management features Proposed mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25713 - Add INFO IOCTL interface to query GPU faults Proposed Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238 Proposed libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/298 Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20231013175758.1735031-1-alexander.deucher@amd.com
author: Dave Airlie <airlied@redhat.com> 2023-10-18 16:08:07 +1000
committer: Dave Airlie <airlied@redhat.com> 2023-10-18 16:08:07 +1000
commit: 27442758e9b4e083bef3f164a1739475c01f3202 (patch)
tree: 58f4a82f1b9e41c11b6247d8f7b9c3c9b8b52711 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent: 08057253366d916a73e62bafb913d9b659228cc1 (diff)
parent: cd90511557fdfb394bb4ac4c3b539b007383914c (diff)
download: linux-27442758e9b4e083bef3f164a1739475c01f3202.tar.gz
linux-27442758e9b4e083bef3f164a1739475c01f3202.tar.bz2
linux-27442758e9b4e083bef3f164a1739475c01f3202.zip
1 files changed, 54 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f5daadcec865..afc19341334f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -885,12 +885,12 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	 * heavy-weight flush TLB unconditionally.
 	 */
 	flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
-		     adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0);
+		     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0);
 
 	/*
 	 * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
 	 */
-	flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0);
+	flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0);
 
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
@@ -1403,7 +1403,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 		spin_unlock(&vm->status_lock);
 
 		/* Try to reserve the BO to avoid clearing its ptes */
-		if (!amdgpu_vm_debug && dma_resv_trylock(resv))
+		if (!adev->debug_vm && dma_resv_trylock(resv))
 			clear = false;
 		/* Somebody else is using the BO right now */
 		else
@@ -2059,7 +2059,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
 	if (amdgpu_vm_block_size != -1)
 		tmp >>= amdgpu_vm_block_size - 9;
 	tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
-	adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
+	adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp);
 	switch (adev->vm_manager.num_level) {
 	case 3:
 		adev->vm_manager.root_level = AMDGPU_VM_PDB2;
@@ -2730,3 +2730,53 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
 		   total_done_objs);
 }
 #endif
+
+/**
+ * amdgpu_vm_update_fault_cache - update cached fault into.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ * @status: GPUVM fault status register
+ * @vmhub: which vmhub got the fault
+ *
+ * Cache the fault info for later use by userspace in debugging.
+ */
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+				  unsigned int pasid,
+				  uint64_t addr,
+				  uint32_t status,
+				  unsigned int vmhub)
+{
+	struct amdgpu_vm *vm;
+	unsigned long flags;
+
+	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+
+	vm = xa_load(&adev->vm_manager.pasids, pasid);
+	/* Don't update the fault cache if status is 0.  In the multiple
+	 * fault case, subsequent faults will return a 0 status which is
+	 * useless for userspace and replaces the useful fault status, so
+	 * only update if status is non-0.
+	 */
+	if (vm && status) {
+		vm->fault_info.addr = addr;
+		vm->fault_info.status = status;
+		if (AMDGPU_IS_GFXHUB(vmhub)) {
+			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
+			vm->fault_info.vmhub |=
+				(vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT;
+		} else if (AMDGPU_IS_MMHUB0(vmhub)) {
+			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
+			vm->fault_info.vmhub |=
+				(vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT;
+		} else if (AMDGPU_IS_MMHUB1(vmhub)) {
+			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
+			vm->fault_info.vmhub |=
+				(vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT;
+		} else {
+			WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
+		}
+	}
+	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+}
+
author	Dave Airlie <airlied@redhat.com>	2023-10-18 16:08:07 +1000
committer	Dave Airlie <airlied@redhat.com>	2023-10-18 16:08:07 +1000
commit	27442758e9b4e083bef3f164a1739475c01f3202 (patch)
tree	58f4a82f1b9e41c11b6247d8f7b9c3c9b8b52711 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent	08057253366d916a73e62bafb913d9b659228cc1 (diff)
parent	cd90511557fdfb394bb4ac4c3b539b007383914c (diff)
download	linux-27442758e9b4e083bef3f164a1739475c01f3202.tar.gz linux-27442758e9b4e083bef3f164a1739475c01f3202.tar.bz2 linux-27442758e9b4e083bef3f164a1739475c01f3202.zip