From 277bd3371f11400d5b02df54f057569be4b10cea Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Tue, 24 May 2022 10:51:43 +0800
Subject: drm/amdgpu: convert gfx.kiq to array type (v3)

v1: more kiq instances are a available in SOC (Le)
v2: squash commits to avoid breaking the build (Le)
v3: make the conversion for gfx/mec v11_0 (Hawking)

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 64ab1a306dfe..290804a06e05 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -824,7 +824,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	/* This is necessary for a HW workaround under SRIOV as well
 	 * as GFXOFF under bare metal
 	 */
-	if (adev->gfx.kiq.ring.sched.ready &&
+	if (adev->gfx.kiq[0].ring.sched.ready &&
 	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
 	    down_read_trylock(&adev->reset_domain->sem)) {
 		uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
@@ -934,8 +934,8 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	uint16_t queried_pasid;
 	bool ret;
 	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+	struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
 
 	if (amdgpu_in_reset(adev))
 		return -EIO;
@@ -955,7 +955,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 		if (vega20_xgmi_wa)
 			ndw += kiq->pmf->invalidate_tlbs_size;
 
-		spin_lock(&adev->gfx.kiq.ring_lock);
+		spin_lock(&adev->gfx.kiq[0].ring_lock);
 		/* 2 dwords flush + 8 dwords fence */
 		amdgpu_ring_alloc(ring, ndw);
 		if (vega20_xgmi_wa)
@@ -966,13 +966,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
 		if (r) {
 			amdgpu_ring_undo(ring);
-			spin_unlock(&adev->gfx.kiq.ring_lock);
+			spin_unlock(&adev->gfx.kiq[0].ring_lock);
 			up_read(&adev->reset_domain->sem);
 			return -ETIME;
 		}
 
 		amdgpu_ring_commit(ring);
-		spin_unlock(&adev->gfx.kiq.ring_lock);
+		spin_unlock(&adev->gfx.kiq[0].ring_lock);
 		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
 		if (r < 1) {
 			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
-- 
cgit 


From c0c27428903700d86920394aa2302506b5d95b17 Mon Sep 17 00:00:00 2001
From: Hamza Mahfooz <hamza.mahfooz@amd.com>
Date: Tue, 2 May 2023 11:59:08 -0400
Subject: drm/amdgpu: fix an amdgpu_irq_put() issue in gmc_v9_0_hw_fini()

As made mention of in commit c56edea58c31 ("drm/amdgpu: fix
amdgpu_irq_put call trace in gmc_v10_0_hw_fini") and commit aa6ac247ed7d
("drm/amdgpu: fix amdgpu_irq_put call trace in gmc_v11_0_hw_fini"). It
is meaningless to call amdgpu_irq_put() for gmc.ecc_irq. So, remove it
from gmc_v9_0_hw_fini().

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2522
Fixes: c8b5a95b5709 ("drm/amdgpu: Fix desktop freezed after gpu-reset")
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 290804a06e05..6ae5cee9b64b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1999,7 +1999,6 @@ static int gmc_v9_0_hw_fini(void *handle)
 	if (adev->mmhub.funcs->update_power_gating)
 		adev->mmhub.funcs->update_power_gating(adev, false);
 
-	amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 
 	return 0;
-- 
cgit 


From f4caf5842652f08e024741ef6d423cb0c101d863 Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Wed, 14 Sep 2022 16:35:50 +0800
Subject: drm/amdgpu: introduce vmhub definition for multi-partition cases (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v1: Each partition has its own gfxhub or mmhub. adjust
the num of MAX_VMHUBS and the GFXHUB/MMHUB layout (Le)

v2: re-design the AMDGPU_GFXHUB/AMDGPU_MMHUB layout (Le)

v3: apply the gfxhub/mmhub layout to new IPs (Hawking)

v4: fix up gmc11 (Alex)

v5: rebase (Alex)

Signed-off-by: Le Ma <le.ma@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 78 +++++++++++++++++------------------
 1 file changed, 38 insertions(+), 40 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6ae5cee9b64b..193ba4d912a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -491,20 +491,20 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
 				 * fini/suspend, so the overall state doesn't
 				 * change over the course of suspend/resume.
 				 */
-				if (adev->in_s0ix && (j == AMDGPU_GFXHUB_0))
+				if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
 					continue;
 
-				if (j == AMDGPU_GFXHUB_0)
-					tmp = RREG32_SOC15_IP(GC, reg);
-				else
+				if (j >= AMDGPU_MMHUB0(0))
 					tmp = RREG32_SOC15_IP(MMHUB, reg);
+				else
+					tmp = RREG32_SOC15_IP(GC, reg);
 
 				tmp &= ~bits;
 
-				if (j == AMDGPU_GFXHUB_0)
-					WREG32_SOC15_IP(GC, reg, tmp);
-				else
+				if (j >= AMDGPU_MMHUB0(0))
 					WREG32_SOC15_IP(MMHUB, reg, tmp);
+				else
+					WREG32_SOC15_IP(GC, reg, tmp);
 			}
 		}
 		break;
@@ -519,20 +519,20 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
 				 * fini/suspend, so the overall state doesn't
 				 * change over the course of suspend/resume.
 				 */
-				if (adev->in_s0ix && (j == AMDGPU_GFXHUB_0))
+				if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
 					continue;
 
-				if (j == AMDGPU_GFXHUB_0)
-					tmp = RREG32_SOC15_IP(GC, reg);
-				else
+				if (j >= AMDGPU_MMHUB0(0))
 					tmp = RREG32_SOC15_IP(MMHUB, reg);
+				else
+					tmp = RREG32_SOC15_IP(GC, reg);
 
 				tmp |= bits;
 
-				if (j == AMDGPU_GFXHUB_0)
-					WREG32_SOC15_IP(GC, reg, tmp);
-				else
+				if (j >= AMDGPU_MMHUB0(0))
 					WREG32_SOC15_IP(MMHUB, reg, tmp);
+				else
+					WREG32_SOC15_IP(GC, reg, tmp);
 			}
 		}
 		break;
@@ -605,13 +605,13 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 
 	if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
 		hub_name = "mmhub0";
-		hub = &adev->vmhub[AMDGPU_MMHUB_0];
+		hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
 	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
 		hub_name = "mmhub1";
-		hub = &adev->vmhub[AMDGPU_MMHUB_1];
+		hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
 	} else {
 		hub_name = "gfxhub0";
-		hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+		hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
 	}
 
 	memset(&task_info, 0, sizeof(struct amdgpu_task_info));
@@ -636,7 +636,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	 * be updated to avoid reading an incorrect value due to
 	 * the new fast GRBM interface.
 	 */
-	if ((entry->vmid_src == AMDGPU_GFXHUB_0) &&
+	if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
 	    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
 		RREG32(hub->vm_l2_pro_fault_status);
 
@@ -649,7 +649,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	dev_err(adev->dev,
 		"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
 		status);
-	if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
+	if (hub == &adev->vmhub[AMDGPU_GFXHUB(0)]) {
 		dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
 			cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
 			gfxhub_client_ids[cid],
@@ -759,8 +759,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
 		return false;
 
-	return ((vmhub == AMDGPU_MMHUB_0 ||
-		 vmhub == AMDGPU_MMHUB_1) &&
+	return ((vmhub == AMDGPU_MMHUB0(0) ||
+		 vmhub == AMDGPU_MMHUB1(0)) &&
 		(!amdgpu_sriov_vf(adev)) &&
 		(!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
 		   (adev->apu_flags & AMD_APU_IS_PICASSO))));
@@ -849,11 +849,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	if (use_semaphore) {
 		for (j = 0; j < adev->usec_timeout; j++) {
 			/* a read return value of 1 means semaphore acquire */
-			if (vmhub == AMDGPU_GFXHUB_0)
-				tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
-			else
+			if (vmhub >= AMDGPU_MMHUB0(0))
 				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
-
+			else
+				tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
 			if (tmp & 0x1)
 				break;
 			udelay(1);
@@ -864,27 +863,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	}
 
 	do {
-		if (vmhub == AMDGPU_GFXHUB_0)
-			WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
-		else
+		if (vmhub >= AMDGPU_MMHUB0(0))
 			WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+		else
+			WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
 
 		/*
 		 * Issue a dummy read to wait for the ACK register to
 		 * be cleared to avoid a false ACK due to the new fast
 		 * GRBM interface.
 		 */
-		if ((vmhub == AMDGPU_GFXHUB_0) &&
+		if ((vmhub == AMDGPU_GFXHUB(0)) &&
 		    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
 			RREG32_NO_KIQ(hub->vm_inv_eng0_req +
 				      hub->eng_distance * eng);
 
 		for (j = 0; j < adev->usec_timeout; j++) {
-			if (vmhub == AMDGPU_GFXHUB_0)
-				tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
-			else
+			if (vmhub >= AMDGPU_MMHUB0(0))
 				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
-
+			else
+				tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
 			if (tmp & (1 << vmid))
 				break;
 			udelay(1);
@@ -900,10 +898,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		 * add semaphore release after invalidation,
 		 * write with 0 means semaphore release
 		 */
-		if (vmhub == AMDGPU_GFXHUB_0)
-			WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
+		if (vmhub >= AMDGPU_MMHUB0(0))
+			WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
 		else
-			WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
+			WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
 	}
 
 	spin_unlock(&adev->gmc.invalidate_lock);
@@ -994,7 +992,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 							i, flush_type);
 			} else {
 				gmc_v9_0_flush_gpu_tlb(adev, vmid,
-						AMDGPU_GFXHUB_0, flush_type);
+						AMDGPU_GFXHUB(0), flush_type);
 			}
 			break;
 		}
@@ -1060,10 +1058,10 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
 	uint32_t reg;
 
 	/* Do nothing because there's no lut register for mmhub1. */
-	if (ring->vm_hub == AMDGPU_MMHUB_1)
+	if (ring->vm_hub == AMDGPU_MMHUB1(0))
 		return;
 
-	if (ring->vm_hub == AMDGPU_GFXHUB_0)
+	if (ring->vm_hub == AMDGPU_GFXHUB(0))
 		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
 	else
 		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
@@ -1947,7 +1945,7 @@ static int gmc_v9_0_hw_init(void *handle)
 		adev->mmhub.funcs->set_fault_enable_default(adev, value);
 	}
 	for (i = 0; i < adev->num_vmhubs; ++i) {
-		if (adev->in_s0ix && (i == AMDGPU_GFXHUB_0))
+		if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0)))
 			continue;
 		gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
 	}
-- 
cgit 


From d9426c3d9b4e91dda4f1f1684f9296762fafe0de Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Mon, 20 Dec 2021 16:06:25 +0800
Subject: drm/amdgpu: add bitmask to iterate vmhubs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the layout of VMHUB definition has been changed to cover multiple
XCD/AID case, the original num_vmhubs is not appropriate to do vmhub
iteration any more.

Drop num_vmhubs and introduce vmhubs_mask instead.

v2: switch to the new VMHUB layout
v3: use DECLARE_BITMAP to define vmhubs_mask

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 193ba4d912a6..d4bfb5f8308a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -481,7 +481,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
 
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
-		for (j = 0; j < adev->num_vmhubs; j++) {
+		for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
 			hub = &adev->vmhub[j];
 			for (i = 0; i < 16; i++) {
 				reg = hub->vm_context0_cntl + i;
@@ -509,7 +509,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
 		}
 		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
-		for (j = 0; j < adev->num_vmhubs; j++) {
+		for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
 			hub = &adev->vmhub[j];
 			for (i = 0; i < 16; i++) {
 				reg = hub->vm_context0_cntl + i;
@@ -803,7 +803,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	u32 j, inv_req, inv_req2, tmp;
 	struct amdgpu_vmhub *hub;
 
-	BUG_ON(vmhub >= adev->num_vmhubs);
+	BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
 
 	hub = &adev->vmhub[vmhub];
 	if (adev->gmc.xgmi.num_physical_nodes &&
@@ -987,7 +987,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 				&queried_pasid);
 		if (ret && queried_pasid == pasid) {
 			if (all_hub) {
-				for (i = 0; i < adev->num_vmhubs; i++)
+				for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
 					gmc_v9_0_flush_gpu_tlb(adev, vmid,
 							i, flush_type);
 			} else {
@@ -1684,7 +1684,8 @@ static int gmc_v9_0_sw_init(void *handle)
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(9, 1, 0):
 	case IP_VERSION(9, 2, 2):
-		adev->num_vmhubs = 2;
+		set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+		set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
 
 		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
@@ -1701,8 +1702,8 @@ static int gmc_v9_0_sw_init(void *handle)
 	case IP_VERSION(9, 3, 0):
 	case IP_VERSION(9, 4, 2):
 	case IP_VERSION(9, 4, 3):
-		adev->num_vmhubs = 2;
-
+		set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+		set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
 
 		/*
 		 * To fulfill 4-level page support,
@@ -1718,7 +1719,9 @@ static int gmc_v9_0_sw_init(void *handle)
 			adev->gmc.translate_further = adev->vm_manager.num_level > 1;
 		break;
 	case IP_VERSION(9, 4, 1):
-		adev->num_vmhubs = 3;
+		set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+		set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+		set_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask);
 
 		/* Keep the vm size same with Vega20 */
 		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
@@ -1944,7 +1947,7 @@ static int gmc_v9_0_hw_init(void *handle)
 			adev->gfxhub.funcs->set_fault_enable_default(adev, value);
 		adev->mmhub.funcs->set_fault_enable_default(adev, value);
 	}
-	for (i = 0; i < adev->num_vmhubs; ++i) {
+	for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
 		if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0)))
 			continue;
 		gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
-- 
cgit 


From ce8a12a532ed62d7037be91c5714243fdfa9f672 Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Mon, 20 Dec 2021 16:42:20 +0800
Subject: drm/amdgpu: init vmhubs bitmask for GC 9.4.3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each XCD owns one GFXHUB.

v2: switch to the new VMHUB layout

Signed-off-by: Le Ma <le.ma@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index d4bfb5f8308a..6da85365e5aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1701,7 +1701,6 @@ static int gmc_v9_0_sw_init(void *handle)
 	case IP_VERSION(9, 4, 0):
 	case IP_VERSION(9, 3, 0):
 	case IP_VERSION(9, 4, 2):
-	case IP_VERSION(9, 4, 3):
 		set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
 		set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
 
@@ -1727,6 +1726,12 @@ static int gmc_v9_0_sw_init(void *handle)
 		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
 		adev->gmc.translate_further = adev->vm_manager.num_level > 1;
 		break;
+	case IP_VERSION(9, 4, 3):
+		bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), adev->gfx.num_xcd);
+		bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), 1);
+
+		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+		break;
 	default:
 		break;
 	}
-- 
cgit 


From f87f686482c6d2d4465245356854710b01f312c1 Mon Sep 17 00:00:00 2001
From: Mukul Joshi <mukul.joshi@amd.com>
Date: Mon, 9 May 2022 22:22:20 -0400
Subject: drm/amdgpu: Add XCC inst to PASID TLB flushing

Add XCC instance to select the correct KIQ ring when
flushing TLBs on a multi-XCC setup.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Tested-by: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6da85365e5aa..0163a761ccf0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -924,7 +924,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
  */
 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 					uint16_t pasid, uint32_t flush_type,
-					bool all_hub)
+					bool all_hub, uint32_t inst)
 {
 	int vmid, i;
 	signed long r;
@@ -932,8 +932,8 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	uint16_t queried_pasid;
 	bool ret;
 	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
-	struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
 
 	if (amdgpu_in_reset(adev))
 		return -EIO;
@@ -953,7 +953,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 		if (vega20_xgmi_wa)
 			ndw += kiq->pmf->invalidate_tlbs_size;
 
-		spin_lock(&adev->gfx.kiq[0].ring_lock);
+		spin_lock(&adev->gfx.kiq[inst].ring_lock);
 		/* 2 dwords flush + 8 dwords fence */
 		amdgpu_ring_alloc(ring, ndw);
 		if (vega20_xgmi_wa)
@@ -964,13 +964,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
 		if (r) {
 			amdgpu_ring_undo(ring);
-			spin_unlock(&adev->gfx.kiq[0].ring_lock);
+			spin_unlock(&adev->gfx.kiq[inst].ring_lock);
 			up_read(&adev->reset_domain->sem);
 			return -ETIME;
 		}
 
 		amdgpu_ring_commit(ring);
-		spin_unlock(&adev->gfx.kiq[0].ring_lock);
+		spin_unlock(&adev->gfx.kiq[inst].ring_lock);
 		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
 		if (r < 1) {
 			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
-- 
cgit 


From 21e1217b4c0e0234704d50ea303c7603266604ac Mon Sep 17 00:00:00 2001
From: Mukul Joshi <mukul.joshi@amd.com>
Date: Mon, 9 May 2022 22:30:57 -0400
Subject: drm/amdgpu: Fix VM fault reporting on XCC1

Fix VM fault reporting and clear VM fault register
for XCC1.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 0163a761ccf0..681bc9d354fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -557,6 +557,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	u64 addr;
 	uint32_t cam_index = 0;
 	int ret;
+	uint32_t node_id;
 
 	addr = (u64)entry->src_data[0] << 12;
 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
@@ -611,7 +612,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 		hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
 	} else {
 		hub_name = "gfxhub0";
-		hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+		node_id = (adev->ip_versions[GC_HWIP][0] ==
+			   IP_VERSION(9, 4, 3)) ? entry->node_id : 0;
+		hub = &adev->vmhub[node_id/2];
 	}
 
 	memset(&task_info, 0, sizeof(struct amdgpu_task_info));
@@ -645,11 +648,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
 	WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
 
-
 	dev_err(adev->dev,
 		"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
 		status);
-	if (hub == &adev->vmhub[AMDGPU_GFXHUB(0)]) {
+	if (entry->vmid_src == AMDGPU_GFXHUB(0)) {
 		dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
 			cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
 			gfxhub_client_ids[cid],
-- 
cgit 


From 5de6bd6a13f1c717279c870eb8290e466c8f6a80 Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Fri, 25 Feb 2022 15:47:20 +0800
Subject: drm/amdgpu: set mmhub bitmask for multiple AIDs

Like GFXHUB, set MMHUB0 bitmask for each AID.

Signed-off-by: Le Ma <le.ma@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 681bc9d354fe..59be0c0293c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1730,7 +1730,7 @@ static int gmc_v9_0_sw_init(void *handle)
 		break;
 	case IP_VERSION(9, 4, 3):
 		bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), adev->gfx.num_xcd);
-		bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), 1);
+		bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), adev->num_aid);
 
 		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
 		break;
-- 
cgit 


From 9eb7681f760c77adece36bc62953245c9f44a3be Mon Sep 17 00:00:00 2001
From: Shiwu Zhang <shiwu.zhang@amd.com>
Date: Mon, 21 Feb 2022 15:38:39 +0800
Subject: drm/amdgpu: add the support of XGMI link for GC 9.4.3

Add the xgmi LFB_CNTL/LBF_SIZE reg addresses to fetch the xgmi info from.

v2: move get_xgmi_info() to GC_V9_4_3 sepecific source files to utilize
the register definitions specific for GC_V9_4_3
v3: remove the duplicated register definitions
v4: enable xgmi based on asic_type as XGMI_IP ver is not available
yet for IP discovery

Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com>
Reviewed-by: Le Ma <Le.Ma@amd.com>
Ack-by: Lijo Lazar <Lijo.Lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 59be0c0293c4..4b2c4ecd7253 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1419,9 +1419,13 @@ static int gmc_v9_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	/* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */
-	if (adev->asic_type == CHIP_VEGA20 ||
-	    adev->asic_type == CHIP_ARCTURUS)
+	/*
+	 * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined
+	 * in their IP discovery tables
+	 */
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) ||
+	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
+	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
 		adev->gmc.xgmi.supported = true;
 
 	if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
-- 
cgit 


From 8078f1c610fdcdd8003e2c538fb04af41fa5c269 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Wed, 29 Jun 2022 11:41:53 +0530
Subject: drm/amdgpu: Change num_xcd to xcc_mask

Instead of number of XCCs, keep a mask of XCCs for the exact XCCs
available on the ASIC. XCC configuration could differ based on
different ASIC configs.

v2:
	Rename num_xcd to num_xcc (Hawking)
	Use smaller xcc_mask size, changed to u16 (Le)

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Le Ma <Le.Ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 4b2c4ecd7253..2c322a25bf1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1733,7 +1733,8 @@ static int gmc_v9_0_sw_init(void *handle)
 		adev->gmc.translate_further = adev->vm_manager.num_level > 1;
 		break;
 	case IP_VERSION(9, 4, 3):
-		bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), adev->gfx.num_xcd);
+		bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
+				  NUM_XCC(adev->gfx.xcc_mask));
 		bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), adev->num_aid);
 
 		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
-- 
cgit 


From 5fb34bd9cf9e248d7e84e431a4a6b731334ab564 Mon Sep 17 00:00:00 2001
From: Alex Sierra <alex.sierra@amd.com>
Date: Tue, 24 May 2022 10:22:12 -0500
Subject: drm/amdkfd: pass kfd_node ref to svm migration api

This work is required for GC 9.4.3, previous to support memory
partitions per node at SVM. When multiple partition is configured,
every BO should be allocated inside one specific partition which
corresponds to the current amdgpu_device and kfd_node.

v2: squash in compilation fix (Alex)
v3: squash in fix for pre-gfx 9.4.3 (Alex)
v4: squash in best_loc fix (Alex)

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2c322a25bf1c..c5752a349f3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -557,11 +557,24 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	u64 addr;
 	uint32_t cam_index = 0;
 	int ret;
-	uint32_t node_id;
+	uint32_t node_id = 0;
 
 	addr = (u64)entry->src_data[0] << 12;
 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
 
+	if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
+		hub_name = "mmhub0";
+		hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
+		hub_name = "mmhub1";
+		hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
+	} else {
+		hub_name = "gfxhub0";
+		node_id = (adev->ip_versions[GC_HWIP][0] ==
+			   IP_VERSION(9, 4, 3)) ? entry->node_id : 0;
+		hub = &adev->vmhub[node_id/2];
+	}
+
 	if (retry_fault) {
 		if (adev->irq.retry_cam_enabled) {
 			/* Delegate it to a different ring if the hardware hasn't
@@ -574,7 +587,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 
 			cam_index = entry->src_data[2] & 0x3ff;
 
-			ret = amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault);
+			ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id,
+						     addr, write_fault);
 			WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
 			if (ret)
 				return 1;
@@ -596,7 +610,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 			/* Try to handle the recoverable page faults by filling page
 			 * tables
 			 */
-			if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
+			if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id,
+						   addr, write_fault))
 				return 1;
 		}
 	}
@@ -604,18 +619,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	if (!printk_ratelimit())
 		return 0;
 
-	if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
-		hub_name = "mmhub0";
-		hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
-	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
-		hub_name = "mmhub1";
-		hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
-	} else {
-		hub_name = "gfxhub0";
-		node_id = (adev->ip_versions[GC_HWIP][0] ==
-			   IP_VERSION(9, 4, 3)) ? entry->node_id : 0;
-		hub = &adev->vmhub[node_id/2];
-	}
 
 	memset(&task_info, 0, sizeof(struct amdgpu_task_info));
 	amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-- 
cgit 


From f5fe7edfd6ce62cd23fbd707e7f9fe0f56a45e94 Mon Sep 17 00:00:00 2001
From: Mukul Joshi <mukul.joshi@amd.com>
Date: Fri, 30 Sep 2022 09:16:21 -0400
Subject: drm/amdkfd: Update interrupt handling for GFX9.4.3

Update interrupt handling in CPX mode for GFX9.4.3 by using the
VMID space instead of SDMA client id to determine if an interrupt
should be processed by a KFD node. This is especially needed for
handling retry faults from MMHUB.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index c5752a349f3d..f2814270da40 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -587,7 +587,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 
 			cam_index = entry->src_data[2] & 0x3ff;
 
-			ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id,
+			ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
 						     addr, write_fault);
 			WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
 			if (ret)
@@ -610,7 +610,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 			/* Try to handle the recoverable page faults by filling page
 			 * tables
 			 */
-			if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id,
+			if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
 						   addr, write_fault))
 				return 1;
 		}
-- 
cgit 


From eaae4beee8a94b30f37341c9d14837c82e7e2647 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Mon, 14 Nov 2022 17:35:43 -0500
Subject: drm/amdgpu: more GPU page fault info for GC v9.4.3

Output IH cookie node_id and translate it to the corresponding AID id
and XCC id, to help debug the GPU page fault.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f2814270da40..2966aca9545d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -557,7 +557,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	u64 addr;
 	uint32_t cam_index = 0;
 	int ret;
-	uint32_t node_id = 0;
+	uint32_t node_id;
+
+	node_id = (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) ? entry->node_id : 0;
 
 	addr = (u64)entry->src_data[0] << 12;
 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
@@ -570,8 +572,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 		hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
 	} else {
 		hub_name = "gfxhub0";
-		node_id = (adev->ip_versions[GC_HWIP][0] ==
-			   IP_VERSION(9, 4, 3)) ? entry->node_id : 0;
 		hub = &adev->vmhub[node_id/2];
 	}
 
@@ -634,6 +634,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 		addr, entry->client_id,
 		soc15_ih_clientid_name[entry->client_id]);
 
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+		dev_err(adev->dev, "  cookie node_id %d fault from die %s%d%s\n",
+			node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4,
+			node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : "");
+
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
-- 
cgit 


From 497db7ea33f7cec2a0019894e844789f003dbd22 Mon Sep 17 00:00:00 2001
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Date: Tue, 8 Nov 2022 23:04:30 -0500
Subject: drm/amdgpu: Check APU supports true APP mode

On GPXIP 9.4.3 APU, in no carveout mode there is no real vram heap and
could be emulated by the driver over the interleaved NUMA system memory
and the APU could also  be in the carveout mode during early development
stage or otherwise for debugging purpose so introduce a new member in
amdgpu_gmc to figure out whether the APU is in the native mode as per
the production configuration. AMD_IS_APU cannot be used for Accelerated
Processing Platform APUs as it might be used in a different context on
previous generations or on small APUs.

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Tested-by: Graham Sider <graham.sider@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2966aca9545d..0792c48fe347 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1442,6 +1442,20 @@ static int gmc_v9_0_early_init(void *handle)
 			adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
 	}
 
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
+		enum amdgpu_pkg_type pkg_type =
+			adev->smuio.funcs->get_pkg_type(adev);
+		/* On GFXIP 9.4.3. APU, there is no physical VRAM domain present
+		 * and the APU, can be in used two possible modes:
+		 *  - carveout mode
+		 *  - native APU mode
+		 * "is_app_apu" can be used to identify the APU in the native
+		 * mode.
+		 */
+		adev->gmc.is_app_apu = (pkg_type == AMDGPU_PKG_TYPE_APU &&
+					!pci_resource_len(adev->pdev, 0));
+	}
+
 	gmc_v9_0_set_gmc_funcs(adev);
 	gmc_v9_0_set_irq_funcs(adev);
 	gmc_v9_0_set_umc_funcs(adev);
-- 
cgit 


From 7a1efad04c210594069c4ab9f9c25039cd6915e4 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Tue, 29 Nov 2022 14:00:37 +0530
Subject: drm/amdgpu: Use mask for active clusters

Use a mask of available active clusters instead of using only the number
of active clusters.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 0792c48fe347..b3f64f2f306d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1674,6 +1674,7 @@ static int gmc_v9_0_sw_init(void *handle)
 {
 	int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	unsigned long inst_mask = adev->aid_mask;
 
 	adev->gfxhub.funcs->init(adev);
 
@@ -1757,7 +1758,9 @@ static int gmc_v9_0_sw_init(void *handle)
 	case IP_VERSION(9, 4, 3):
 		bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
 				  NUM_XCC(adev->gfx.xcc_mask));
-		bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), adev->num_aid);
+
+		inst_mask <<= AMDGPU_MMHUB0(0);
+		bitmap_or(adev->vmhubs_mask, adev->vmhubs_mask, &inst_mask, 32);
 
 		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
 		break;
-- 
cgit 


From 753b999afe47900531282f86bf430aec250b4232 Mon Sep 17 00:00:00 2001
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Date: Wed, 7 Dec 2022 00:29:40 -0500
Subject: drm/amdgpu: set MTYPE in PTE for GFXIP 9.4.3

Apply the GFXIP 9.4.3 specific snoop and mtype settings for various
scenarios such as APU, APU in Carveout mode and dGPU mode.

Note: This is expected to change due to:
1 - NPS > 1 support in future
2 - Hardware bugs found during initial asic bringup.

Cc: Graham Sider <graham.sider@amd.com>
Cc: Hawking Zhang <hawking.zhang@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 40 ++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b3f64f2f306d..3765178e6fc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1173,7 +1173,6 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(9, 4, 1):
 	case IP_VERSION(9, 4, 2):
-	case IP_VERSION(9, 4, 3):
 		if (is_vram) {
 			if (bo_adev == adev) {
 				if (uncached)
@@ -1207,6 +1206,45 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 			 */
 			snoop = true;
 		}
+		break;
+	case IP_VERSION(9, 4, 3):
+		/* FIXME: Needs more work for handling multiple memory
+		 * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU
+		 * modes.
+		 */
+		snoop = true;
+		if (uncached) {
+			mtype = MTYPE_UC;
+		} else if (adev->gmc.is_app_apu) {
+			/* FIXME: APU in native mode, NPS1 single socket only
+			 *
+			 * For suporting NUMA partitioned APU e.g. in NPS4 mode,
+			 * this need to look at the NUMA node on which the
+			 * system memory allocation was done.
+			 *
+			 * Memory access by a different partition within same
+			 * socket should be treated as remote access so MTYPE_RW
+			 * cannot be used always.
+			 */
+			mtype = MTYPE_RW;
+		} else if (adev->flags & AMD_IS_APU) {
+			/* APU on carve out mode */
+			mtype = MTYPE_RW;
+		} else {
+			/* dGPU */
+			/*
+			if ((mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
+			    bo_adev == adev)
+				mapping_flags |= AMDGPU_VM_MTYPE_RW;
+			else
+			*/
+			/* Temporarily comment out above lines and use MTYPE_NC
+			 * on both VRAM and system memory access until
+			 * MTYPE_RW can properly work on VRAM access
+			 */
+			mtype = MTYPE_NC;
+		}
+
 		break;
 	default:
 		if (uncached || coherent)
-- 
cgit 


From 98b2e9cad2279132e3aa4b9caf9164b2e35c1a52 Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Fri, 9 Dec 2022 19:44:05 +0800
Subject: drm/amdgpu: correct the vmhub index when page fault occurs

The AMDGPU_GFXHUB was bind to each xcc in the logical order.
Thus convert the node_id to logical xcc_id to index the
correct AMDGPU_GFXHUB. And "node_id / 4" can get the correct
AMDGPU_MMHUB0 index.

Signed-off-by: Le Ma <le.ma@amd.com>
Tested-by: Asad kamal <asad.kamal@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3765178e6fc5..841333148610 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -557,22 +557,28 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	u64 addr;
 	uint32_t cam_index = 0;
 	int ret;
-	uint32_t node_id;
+	uint32_t node_id, xcc_id = 0;
 
-	node_id = (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) ? entry->node_id : 0;
+	node_id = entry->node_id;
 
 	addr = (u64)entry->src_data[0] << 12;
 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
 
 	if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
 		hub_name = "mmhub0";
-		hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+		hub = &adev->vmhub[AMDGPU_MMHUB0(node_id / 4)];
 	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
 		hub_name = "mmhub1";
 		hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
 	} else {
 		hub_name = "gfxhub0";
-		hub = &adev->vmhub[node_id/2];
+		if (adev->gfx.funcs->ih_node_to_logical_xcc) {
+			xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
+				node_id);
+			if (xcc_id < 0)
+				xcc_id = 0;
+		}
+		hub = &adev->vmhub[xcc_id];
 	}
 
 	if (retry_fault) {
-- 
cgit 


From a0a0c69c05bff025abf49ec66b2bfb94aeabcc6e Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Wed, 14 Dec 2022 10:28:50 +0530
Subject: drm/amdgpu: Fix semaphore release

Use the right register for semaphore release during invalidation.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 841333148610..1e4364120845 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -915,9 +915,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		 * write with 0 means semaphore release
 		 */
 		if (vmhub >= AMDGPU_MMHUB0(0))
-			WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+			WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
 		else
-			WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+			WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
 	}
 
 	spin_unlock(&adev->gmc.invalidate_lock);
-- 
cgit 


From 12c4d7edfb7238ded6c7a2584995d888b4d877ec Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Thu, 15 Dec 2022 13:13:29 +0530
Subject: drm/amdgpu: Fix GFX 9.4.3 dma address capability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ASICs with GFX 9.4.3 support 48-bit addressing.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 1e4364120845..444441c6b7e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1846,7 +1846,7 @@ static int gmc_v9_0_sw_init(void *handle)
 	 */
 	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 
-	dma_addr_bits = adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ? 48:44;
+	dma_addr_bits = adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) ? 48:44;
 	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits));
 	if (r) {
 		printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
-- 
cgit 


From c9a502e981a961053f3f873b14677d95e804251e Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Tue, 29 Nov 2022 12:45:26 -0500
Subject: drm/amdgpu: Allocate GART table in RAM for AMD APU

Some AMD APUs may not have a dedicated VRAM. On such platforms the GART
table should be allocated on the system memory. When real vram size is
zero, place the GART table in system memory and create an SG BO to make
it GPU accessible.

v2: fix includes

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
(rajneesh: removed set_memory_wc workaround)
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 444441c6b7e3..aca8489635b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1688,12 +1688,18 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
 	adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
 				 AMDGPU_PTE_EXECUTABLE;
 
-	r = amdgpu_gart_table_vram_alloc(adev);
-	if (r)
-		return r;
+	if (!adev->gmc.real_vram_size) {
+		dev_info(adev->dev, "Put GART in system memory for APU\n");
+		r = amdgpu_gart_table_ram_alloc(adev);
+		if (r)
+			dev_err(adev->dev, "Failed to allocate GART in system memory\n");
+	} else {
+		r = amdgpu_gart_table_vram_alloc(adev);
+		if (r)
+			return r;
 
-	if (adev->gmc.xgmi.connected_to_cpu) {
-		r = amdgpu_gmc_pdb0_alloc(adev);
+		if (adev->gmc.xgmi.connected_to_cpu)
+			r = amdgpu_gmc_pdb0_alloc(adev);
 	}
 
 	return r;
@@ -1902,7 +1908,12 @@ static int gmc_v9_0_sw_fini(void *handle)
 	amdgpu_gmc_ras_fini(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_vm_manager_fini(adev);
-	amdgpu_gart_table_vram_free(adev);
+	if (!adev->gmc.real_vram_size) {
+		dev_info(adev->dev, "Put GART in system memory for APU free\n");
+		amdgpu_gart_table_ram_free(adev);
+	} else {
+		amdgpu_gart_table_vram_free(adev);
+	}
 	amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
 	amdgpu_bo_fini(adev);
 
-- 
cgit 


From d839a158b2480814bc438f9f46f440a7b9f63cb6 Mon Sep 17 00:00:00 2001
From: Graham Sider <Graham.Sider@amd.com>
Date: Thu, 5 Jan 2023 10:58:07 -0500
Subject: drm/amdgpu: Correct dGPU MTYPE settings for gfx943

Revert temporary dGPU VRAM MTYPE setting and align with expected
coherency protocol.

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index aca8489635b8..b6c500be6f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1238,17 +1238,12 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 			mtype = MTYPE_RW;
 		} else {
 			/* dGPU */
-			/*
-			if ((mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
-			    bo_adev == adev)
-				mapping_flags |= AMDGPU_VM_MTYPE_RW;
+			if (is_vram && bo_adev == adev)
+				mtype = MTYPE_RW;
+			else if (is_vram)
+				mtype = MTYPE_NC;
 			else
-			*/
-			/* Temporarily comment out above lines and use MTYPE_NC
-			 * on both VRAM and system memory access until
-			 * MTYPE_RW can properly work on VRAM access
-			 */
-			mtype = MTYPE_NC;
+				mtype = MTYPE_UC;
 		}
 
 		break;
-- 
cgit 


From 73c2b3fd2c515bcb819d801c5c4bf053fdb1e5cb Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Sun, 22 Jan 2023 23:26:40 +0800
Subject: drm/amdgpu: Initialize mmhub v1_8 ras function

Initialize mmhub v1_8 ras function.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b6c500be6f70..16634a791e10 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1419,6 +1419,9 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
 	case IP_VERSION(9, 4, 2):
 		adev->mmhub.ras = &mmhub_v1_7_ras;
 		break;
+	case IP_VERSION(1, 8, 0):
+		adev->mmhub.ras = &mmhub_v1_8_ras;
+		break;
 	default:
 		/* mmhub ras is not available */
 		break;
-- 
cgit 


From 228ce176434b0f61451019065393040d58e1668d Mon Sep 17 00:00:00 2001
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Date: Fri, 27 Jan 2023 21:57:00 -0500
Subject: drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3

[For 1P NPS1 mode driver bringup]

Changes required to initialize the amdgpu driver with frontdoor firmware
loading and discovery=2 with the native mode SBIOS that enables CPU GPU
unified interleaved memory.

sudo modprobe amdgpu discovery=2

Once PSP TMR region is reported via the ACPI interface, the dependency
on the ip_discovery.bin will be removed.

Choice of where to allocate driver table is given to each IP version. In
general, both GTT and VRAM domains will be considered. If one of the
tables has a strict restriction for VRAM domain, then only VRAM domain
is considered.

Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
(lijo: Modified the handling for SMU Tables)
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 16634a791e10..245de27c7540 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1593,8 +1593,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 	int r;
 
 	/* size in MB on si */
-	adev->gmc.mc_vram_size =
-		adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+	if (!adev->gmc.is_app_apu) {
+		adev->gmc.mc_vram_size =
+			adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+	} else {
+		DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n");
+		adev->gmc.mc_vram_size = 0;
+	}
 	adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
 
 	if (!(adev->flags & AMD_IS_APU) &&
-- 
cgit 


From b6f90baafe267a0705c5d9b1429c875d3c39fbc7 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Tue, 31 Jan 2023 12:39:49 +0530
Subject: drm/amdgpu: Move memory partition query to gmc

GMC block handles memory related information, it makes more sense to
keep memory partition functions in gmc block.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 245de27c7540..db157a31a780 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1330,6 +1330,17 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
 	return size;
 }
 
+static enum amdgpu_memory_partition
+gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
+{
+	enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+	if (adev->nbio.funcs->get_memory_partition_mode)
+		mode = adev->nbio.funcs->get_memory_partition_mode(adev);
+
+	return mode;
+}
+
 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
 	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
@@ -1339,6 +1350,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
 	.get_vm_pde = gmc_v9_0_get_vm_pde,
 	.get_vm_pte = gmc_v9_0_get_vm_pte,
 	.get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
+	.query_mem_partition_mode = &gmc_v9_0_query_memory_partition,
 };
 
 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -1901,6 +1913,9 @@ static int gmc_v9_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+		amdgpu_gmc_sysfs_init(adev);
+
 	return 0;
 }
 
@@ -1908,6 +1923,9 @@ static int gmc_v9_0_sw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+		amdgpu_gmc_sysfs_fini(adev);
+
 	amdgpu_gmc_ras_fini(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_vm_manager_fini(adev);
-- 
cgit 


From 0f2e1d620eca56c4ceebc041aabb1eda26b2cfd0 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Fri, 17 Feb 2023 09:32:44 +0530
Subject: drm/amdgpu: Get supported memory partition modes

Expand the interface to get supported memory partition modes also along
with the current memory partition mode.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index db157a31a780..d6a1dac01952 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1331,16 +1331,23 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
 }
 
 static enum amdgpu_memory_partition
-gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
+gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
 {
 	enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
 
 	if (adev->nbio.funcs->get_memory_partition_mode)
-		mode = adev->nbio.funcs->get_memory_partition_mode(adev);
+		mode = adev->nbio.funcs->get_memory_partition_mode(adev,
+								   supp_modes);
 
 	return mode;
 }
 
+static enum amdgpu_memory_partition
+gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
+{
+	return gmc_v9_0_get_memory_partition(adev, NULL);
+}
+
 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
 	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
-- 
cgit 


From a433f1f59484fba7a7743a3c5a5f320d9e828b3a Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Tue, 14 Feb 2023 14:45:45 +0530
Subject: drm/amdgpu: Initialize memory ranges for GC 9.4.3

GC 9.4.3 ASICS may have memory split into multiple partitions.Initialize
the memory partition information for each range. The information may be
in the form of a numa node id or a range of pages.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 172 ++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index d6a1dac01952..1653d77df3ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -79,6 +79,7 @@
 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2                                                          0x05ea
 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX                                                 2
 
+#define MAX_MEM_RANGES 8
 
 static const char *gfxhub_client_ids[] = {
 	"CB",
@@ -1742,6 +1743,169 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
 		adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
 }
 
+static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev)
+{
+	enum amdgpu_memory_partition mode;
+	u32 supp_modes;
+	bool valid;
+
+	mode = gmc_v9_0_get_memory_partition(adev, &supp_modes);
+
+	/* Mode detected by hardware not present in supported modes */
+	if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) &&
+	    !(BIT(mode - 1) & supp_modes))
+		return false;
+
+	switch (mode) {
+	case UNKNOWN_MEMORY_PARTITION_MODE:
+	case AMDGPU_NPS1_PARTITION_MODE:
+		valid = (adev->gmc.num_mem_partitions == 1);
+		break;
+	case AMDGPU_NPS2_PARTITION_MODE:
+		valid = (adev->gmc.num_mem_partitions == 2);
+		break;
+	case AMDGPU_NPS4_PARTITION_MODE:
+		valid = (adev->gmc.num_mem_partitions == 3 ||
+			 adev->gmc.num_mem_partitions == 4);
+		break;
+	default:
+		valid = false;
+	}
+
+	return valid;
+}
+
+static bool gmc_v9_0_is_node_present(int *node_ids, int num_ids, int nid)
+{
+	int i;
+
+	/* Check if node with id 'nid' is present in 'node_ids' array */
+	for (i = 0; i < num_ids; ++i)
+		if (node_ids[i] == nid)
+			return true;
+
+	return false;
+}
+
+static void
+gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev,
+			      struct amdgpu_mem_partition_info *mem_ranges)
+{
+	int num_ranges = 0, ret, mem_groups;
+	struct amdgpu_numa_info numa_info;
+	int node_ids[MAX_MEM_RANGES];
+	int num_xcc, xcc_id;
+	uint32_t xcc_mask;
+
+	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+	xcc_mask = (1U << num_xcc) - 1;
+	mem_groups = hweight32(adev->aid_mask);
+
+	for_each_inst(xcc_id, xcc_mask)	{
+		ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+		if (ret)
+			continue;
+
+		if (numa_info.nid == NUMA_NO_NODE) {
+			mem_ranges[0].size = numa_info.size;
+			mem_ranges[0].numa.node = numa_info.nid;
+			num_ranges = 1;
+			break;
+		}
+
+		if (gmc_v9_0_is_node_present(node_ids, num_ranges,
+					     numa_info.nid))
+			continue;
+
+		node_ids[num_ranges] = numa_info.nid;
+		mem_ranges[num_ranges].numa.node = numa_info.nid;
+		mem_ranges[num_ranges].size = numa_info.size;
+		++num_ranges;
+	}
+
+	adev->gmc.num_mem_partitions = num_ranges;
+
+	/* If there is only partition, don't use entire size */
+	if (adev->gmc.num_mem_partitions == 1)
+		mem_ranges[0].size =
+			(mem_ranges[0].size * (mem_groups - 1) / mem_groups);
+}
+
+static void
+gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
+			    struct amdgpu_mem_partition_info *mem_ranges)
+{
+	enum amdgpu_memory_partition mode;
+	u32 start_addr = 0, size;
+	int i;
+
+	mode = gmc_v9_0_query_memory_partition(adev);
+
+	switch (mode) {
+	case UNKNOWN_MEMORY_PARTITION_MODE:
+	case AMDGPU_NPS1_PARTITION_MODE:
+		adev->gmc.num_mem_partitions = 1;
+		break;
+	case AMDGPU_NPS2_PARTITION_MODE:
+		adev->gmc.num_mem_partitions = 2;
+		break;
+	case AMDGPU_NPS4_PARTITION_MODE:
+		if (adev->flags & AMD_IS_APU)
+			adev->gmc.num_mem_partitions = 3;
+		else
+			adev->gmc.num_mem_partitions = 4;
+		break;
+	default:
+		adev->gmc.num_mem_partitions = 1;
+		break;
+	}
+
+	size = (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) /
+	       adev->gmc.num_mem_partitions;
+
+	for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+		mem_ranges[i].range.fpfn = start_addr;
+		mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
+		mem_ranges[i].range.lpfn = start_addr + size - 1;
+		start_addr += size;
+	}
+
+	/* Adjust the last one */
+	mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn =
+		(adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
+	mem_ranges[adev->gmc.num_mem_partitions - 1].size =
+		adev->gmc.real_vram_size -
+		((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn
+		 << AMDGPU_GPU_PAGE_SHIFT);
+}
+
+static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)
+{
+	bool valid;
+
+	adev->gmc.mem_partitions = kzalloc(
+		MAX_MEM_RANGES * sizeof(struct amdgpu_mem_partition_info),
+		GFP_KERNEL);
+
+	if (!adev->gmc.mem_partitions)
+		return -ENOMEM;
+
+	/* TODO : Get the range from PSP/Discovery for dGPU */
+	if (adev->gmc.is_app_apu)
+		gmc_v9_0_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions);
+	else
+		gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+
+	valid = gmc_v9_0_validate_partition_info(adev);
+	if (!valid) {
+		/* TODO: handle invalid case */
+		dev_WARN(adev->dev,
+			 "Mem ranges not matching with hardware config");
+	}
+
+	return 0;
+}
+
 static int gmc_v9_0_sw_init(void *handle)
 {
 	int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
@@ -1888,6 +2052,12 @@ static int gmc_v9_0_sw_init(void *handle)
 
 	amdgpu_gmc_get_vbios_allocations(adev);
 
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
+		r = gmc_v9_0_init_mem_ranges(adev);
+		if (r)
+			return r;
+	}
+
 	/* Memory manager */
 	r = amdgpu_bo_init(adev);
 	if (r)
@@ -1932,6 +2102,8 @@ static int gmc_v9_0_sw_fini(void *handle)
 
 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
 		amdgpu_gmc_sysfs_fini(adev);
+	adev->gmc.num_mem_partitions = 0;
+	kfree(adev->gmc.mem_partitions);
 
 	amdgpu_gmc_ras_fini(adev);
 	amdgpu_gem_force_release(adev);
-- 
cgit 


From b0a3bbf947f6ed690336cec1f6cde2a30d082dbb Mon Sep 17 00:00:00 2001
From: Gavin Wan <Gavin.Wan@amd.com>
Date: Mon, 3 Apr 2023 17:49:41 -0400
Subject: drm/amdgpu: Skip using MC FB Offset when APU flag is set for SRIOV.

The MC_VM_FB_OFFSET is PF only register. It cannot be read on VF.
So, the driver should not use MC_VM_FB_OFFSET address to set the
address of dev->gmc.aper_base.

Signed-off-by: Gavin Wan <Gavin.Wan@amd.com>
Reviewed-by: Zhigang Luo <zhigang.luo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 1653d77df3ba..58bcd1e1f1b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1644,7 +1644,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 	 */
 
 	/* check whether both host-gpu and gpu-gpu xgmi links exist */
-	if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
+	if ((!amdgpu_sriov_vf(adev) &&
+		(adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
 	    (adev->gmc.xgmi.supported &&
 	     adev->gmc.xgmi.connected_to_cpu)) {
 		adev->gmc.aper_base =
-- 
cgit 


From 46f7b4deb30558593c1d2e62d561a3cee21f558a Mon Sep 17 00:00:00 2001
From: Gavin Wan <Gavin.Wan@amd.com>
Date: Mon, 10 Apr 2023 15:04:26 -0400
Subject: drm/amdgpu: Set memory partitions to 1 for SRIOV.

For SRIOV, the memory partitions are set on host drover. Each VF only
has one memory partition. We need set the memory partitions to 1 on
guest driver for SRIOV.

V2: sqaush in fix ("drm/amdgpu: Fix memory range info of GC 9.4.3 VFs")

Signed-off-by: Gavin Wan <Gavin.Wan@amd.com>
Acked-by: Zhigang Luo <zhigang.luo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 58bcd1e1f1b6..95c3253e240a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1346,6 +1346,9 @@ gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
 static enum amdgpu_memory_partition
 gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
 {
+	if (amdgpu_sriov_vf(adev))
+		return AMDGPU_NPS1_PARTITION_MODE;
+
 	return gmc_v9_0_get_memory_partition(adev, NULL);
 }
 
@@ -1897,7 +1900,10 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)
 	else
 		gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
 
-	valid = gmc_v9_0_validate_partition_info(adev);
+	if (amdgpu_sriov_vf(adev))
+		valid = true;
+	else
+		valid = gmc_v9_0_validate_partition_info(adev);
 	if (!valid) {
 		/* TODO: handle invalid case */
 		dev_WARN(adev->dev,
-- 
cgit 


From 2e8cc5d317d12f7fb4f66361a3ce5427f0abe2cd Mon Sep 17 00:00:00 2001
From: Graham Sider <Graham.Sider@amd.com>
Date: Wed, 8 Feb 2023 11:10:57 -0500
Subject: drm/amdgpu: Use legacy TLB flush for gfx943

Invalidate TLBs via a legacy flush request (flush_type=0) prior to the
heavyweight flush requests (flush_type=2) in gmc_v9_0.c. This is
temporarily required to mitigate a bug causing CPC UTCL1 to return stale
translations after invalidation requests in address range mode.

v2: squash in long term fix "drm/amdgpu: disable extra gfx943 legacy flush on rev1+"

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 95c3253e240a..2eb67b53e497 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -833,6 +833,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		 */
 		inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
 		inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+	} else if (flush_type == 2 &&
+		   adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
+		   adev->rev_id == 0) {
+		inv_req = gmc_v9_0_get_invalidate_req(vmid, 0);
+		inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
 	} else {
 		inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
 		inv_req2 = 0;
@@ -976,6 +981,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 		if (vega20_xgmi_wa)
 			kiq->pmf->kiq_invalidate_tlbs(ring,
 						      pasid, 2, all_hub);
+
+		if (flush_type == 2 &&
+		    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
+		    adev->rev_id == 0)
+			kiq->pmf->kiq_invalidate_tlbs(ring,
+						pasid, 0, all_hub);
+
 		kiq->pmf->kiq_invalidate_tlbs(ring,
 					pasid, flush_type, all_hub);
 		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
-- 
cgit 


From 895797d9193b38e759bc01268a8e3887e521f682 Mon Sep 17 00:00:00 2001
From: Graham Sider <Graham.Sider@amd.com>
Date: Mon, 6 Feb 2023 14:04:42 -0500
Subject: drm/amdgpu/bu: Add use_mtype_cc_wa module param

By default, set use_mtype_cc_wa to 1 to set PTE coherence flag MTYPE_CC
instead of MTYPE_RW by default. This is required for the time being to
mitigate a bug causing XCCs to hit stale data due to TCC marking fully
dirty lines as exclusive.

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Reviewed-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2eb67b53e497..8623b93c05ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1187,6 +1187,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 	bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
 	bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
 	unsigned int mtype;
+	unsigned int mtype_default;
 	bool snoop = false;
 
 	switch (adev->ip_versions[GC_HWIP][0]) {
@@ -1230,7 +1231,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 		/* FIXME: Needs more work for handling multiple memory
 		 * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU
 		 * modes.
+		 * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
+		 * To force use of MTYPE_RW, set use_mtype_cc_wa=0
 		 */
+		mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
 		snoop = true;
 		if (uncached) {
 			mtype = MTYPE_UC;
@@ -1245,14 +1249,14 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 			 * socket should be treated as remote access so MTYPE_RW
 			 * cannot be used always.
 			 */
-			mtype = MTYPE_RW;
+			mtype = mtype_default;
 		} else if (adev->flags & AMD_IS_APU) {
 			/* APU on carve out mode */
-			mtype = MTYPE_RW;
+			mtype = mtype_default;
 		} else {
 			/* dGPU */
 			if (is_vram && bo_adev == adev)
-				mtype = MTYPE_RW;
+				mtype = mtype_default;
 			else if (is_vram)
 				mtype = MTYPE_NC;
 			else
-- 
cgit 


From 1e4a00334add40f609162914af7a24bc92951008 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Tue, 21 Feb 2023 17:31:32 -0500
Subject: drm/amdgpu: Fix per-BO MTYPE selection for GFXv9.4.3

Treat system memory on NUMA systems as remote by default. Overriding with
a more efficient MTYPE per page will be implemented in the next patch.

No need for a special case for APP APUs. System memory is handled the same
for carve-out and native mode. And VRAM doesn't exist in native mode.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 40 ++++++++++++++---------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 8623b93c05ee..cf976b5b7b63 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1186,9 +1186,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 	bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
 	bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
 	bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
-	unsigned int mtype;
-	unsigned int mtype_default;
+	/* TODO: memory partitions struct amdgpu_vm *vm = mapping->bo_va->base.vm;*/
+	unsigned int mtype_local, mtype;
 	bool snoop = false;
+	bool is_local;
 
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(9, 4, 1):
@@ -1228,35 +1229,26 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 		}
 		break;
 	case IP_VERSION(9, 4, 3):
-		/* FIXME: Needs more work for handling multiple memory
-		 * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU
-		 * modes.
-		 * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
-		 * To force use of MTYPE_RW, set use_mtype_cc_wa=0
+		/* Only local VRAM BOs or system memory on non-NUMA APUs
+		 * can be assumed to be local in their entirety. Choose
+		 * MTYPE_NC as safe fallback for all system memory BOs on
+		 * NUMA systems. Their MTYPE can be overridden per-page in
+		 * gmc_v9_0_override_vm_pte_flags.
 		 */
-		mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
+		mtype_local = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
+		is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
+			    num_possible_nodes() <= 1) ||
+			   (is_vram && adev == bo_adev /* TODO: memory partitions &&
+			    bo->mem_id == vm->mem_id*/);
 		snoop = true;
 		if (uncached) {
 			mtype = MTYPE_UC;
-		} else if (adev->gmc.is_app_apu) {
-			/* FIXME: APU in native mode, NPS1 single socket only
-			 *
-			 * For suporting NUMA partitioned APU e.g. in NPS4 mode,
-			 * this need to look at the NUMA node on which the
-			 * system memory allocation was done.
-			 *
-			 * Memory access by a different partition within same
-			 * socket should be treated as remote access so MTYPE_RW
-			 * cannot be used always.
-			 */
-			mtype = mtype_default;
 		} else if (adev->flags & AMD_IS_APU) {
-			/* APU on carve out mode */
-			mtype = mtype_default;
+			mtype = is_local ? mtype_local : MTYPE_NC;
 		} else {
 			/* dGPU */
-			if (is_vram && bo_adev == adev)
-				mtype = mtype_default;
+			if (is_local)
+				mtype = mtype_local;
 			else if (is_vram)
 				mtype = MTYPE_NC;
 			else
-- 
cgit 


From 352b919c1e5ff50c71d665395b27acbd1bf23a05 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Tue, 21 Feb 2023 17:44:18 -0500
Subject: drm/amdgpu: Override MTYPE per page on GFXv9.4.3 APUs

On GFXv9.4.3 NUMA APUs, system memory locality must be determined per
page to choose the correct MTYPE. This patch adds a GMC callback that
can provide this per-page override and implements it for native mode.

Carve-out mode is not yet supported and will use the safe default
(remote) MTYPE for system memory.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 64 +++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index cf976b5b7b63..c64a69f75da2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1297,6 +1297,69 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
 					     mapping, flags);
 }
 
+static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
+					   struct amdgpu_vm *vm,
+					   uint64_t addr, uint64_t *flags)
+{
+	int local_node, nid;
+
+	/* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system
+	 * memory can use more efficient MTYPEs.
+	 */
+	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
+		return;
+
+	/* Only direct-mapped memory allows us to determine the NUMA node from
+	 * the DMA address.
+	 */
+	if (!adev->ram_is_direct_mapped) {
+		dev_dbg(adev->dev, "RAM is not direct mapped\n");
+		return;
+	}
+
+	/* Only override mappings with MTYPE_NC, which is the safe default for
+	 * cacheable memory.
+	 */
+	if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
+	    AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
+		dev_dbg(adev->dev, "MTYPE is not NC\n");
+		return;
+	}
+
+	/* TODO: memory partitions. mem_id is hard-coded to 0 for now.
+	 * FIXME: Only supported on native mode for now. For carve-out, the
+	 * NUMA affinity of the GPU/VM needs to come from the PCI info because
+	 * memory partitions are not associated with different NUMA nodes.
+	 */
+	if (adev->gmc.is_app_apu) {
+		local_node = adev->gmc.mem_partitions[/*vm->mem_id*/0].numa.node;
+	} else {
+		dev_dbg(adev->dev, "Only native mode APU is supported.\n");
+		return;
+	}
+
+	/* Only handle real RAM. Mappings of PCIe resources don't have struct
+	 * page or NUMA nodes.
+	 */
+	if (!page_is_ram(addr >> PAGE_SHIFT)) {
+		dev_dbg(adev->dev, "Page is not RAM.\n");
+		return;
+	}
+	nid = pfn_to_nid(addr >> PAGE_SHIFT);
+	dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
+		/*vm->mem_id*/0, local_node, nid);
+	if (nid == local_node) {
+		unsigned int mtype_local =
+			amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
+		uint64_t old_flags = *flags;
+
+		*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+			 AMDGPU_PTE_MTYPE_VG10(mtype_local);
+		dev_dbg(adev->dev, "flags updated from %llx to %llx\n",
+			old_flags, *flags);
+	}
+}
+
 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
 {
 	u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
@@ -1368,6 +1431,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
 	.map_mtype = gmc_v9_0_map_mtype,
 	.get_vm_pde = gmc_v9_0_get_vm_pde,
 	.get_vm_pte = gmc_v9_0_get_vm_pte,
+	.override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
 	.get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
 	.query_mem_partition_mode = &gmc_v9_0_query_memory_partition,
 };
-- 
cgit 


From 76eb9c95a409ea820b2e7c968c220e7a38f27d76 Mon Sep 17 00:00:00 2001
From: David Francis <David.Francis@amd.com>
Date: Mon, 27 Feb 2023 10:33:11 -0500
Subject: drm/amdgpu/bu: add mtype_local as a module parameter

Selects the MTYPE to be used for local memory,
(0 = MTYPE_CC (default), 1 = MTYPE_NC, 2 = MTYPE_RW)

v2: squash in build fix (Alex)

Reviewed-by: Graham Sider <Graham.Sider@amd.com>
Signed-off-by: David Francis <David.Francis@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index c64a69f75da2..5a1414300271 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1235,7 +1235,16 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 		 * NUMA systems. Their MTYPE can be overridden per-page in
 		 * gmc_v9_0_override_vm_pte_flags.
 		 */
-		mtype_local = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
+		mtype_local = MTYPE_CC;
+		if (amdgpu_mtype_local == 1) {
+			DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
+			mtype_local = MTYPE_NC;
+		} else if (amdgpu_mtype_local == 2) {
+			DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
+			mtype_local = MTYPE_RW;
+		} else {
+			DRM_INFO_ONCE("Using MTYPE_CC for local memory\n");
+		}
 		is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
 			    num_possible_nodes() <= 1) ||
 			   (is_vram && adev == bo_adev /* TODO: memory partitions &&
@@ -1349,9 +1358,13 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
 	dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
 		/*vm->mem_id*/0, local_node, nid);
 	if (nid == local_node) {
-		unsigned int mtype_local =
-			amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
 		uint64_t old_flags = *flags;
+		unsigned int mtype_local = MTYPE_CC;
+
+		if (amdgpu_mtype_local == 1)
+			mtype_local = MTYPE_NC;
+		else if (amdgpu_mtype_local == 2)
+			mtype_local = MTYPE_RW;
 
 		*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
 			 AMDGPU_PTE_MTYPE_VG10(mtype_local);
-- 
cgit 


From b9cbd51000ad3541351ca832b00600870ac08e5c Mon Sep 17 00:00:00 2001
From: Graham Sider <Graham.Sider@amd.com>
Date: Mon, 6 Mar 2023 17:56:44 -0500
Subject: drm/amdgpu/bu: update mtype_local parameter settings

Update mtype_local module parameter to use MTYPE_RW by default.

0: MTYPE_RW (default)
1: MTYPE_NC
2: MTYPE_CC

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 5a1414300271..32eb4f4f5492 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1235,15 +1235,15 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 		 * NUMA systems. Their MTYPE can be overridden per-page in
 		 * gmc_v9_0_override_vm_pte_flags.
 		 */
-		mtype_local = MTYPE_CC;
+		mtype_local = MTYPE_RW;
 		if (amdgpu_mtype_local == 1) {
 			DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
 			mtype_local = MTYPE_NC;
 		} else if (amdgpu_mtype_local == 2) {
-			DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
-			mtype_local = MTYPE_RW;
-		} else {
 			DRM_INFO_ONCE("Using MTYPE_CC for local memory\n");
+			mtype_local = MTYPE_CC;
+		} else {
+			DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
 		}
 		is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
 			    num_possible_nodes() <= 1) ||
@@ -1359,12 +1359,12 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
 		/*vm->mem_id*/0, local_node, nid);
 	if (nid == local_node) {
 		uint64_t old_flags = *flags;
-		unsigned int mtype_local = MTYPE_CC;
+		unsigned int mtype_local = MTYPE_RW;
 
 		if (amdgpu_mtype_local == 1)
 			mtype_local = MTYPE_NC;
 		else if (amdgpu_mtype_local == 2)
-			mtype_local = MTYPE_RW;
+			mtype_local = MTYPE_CC;
 
 		*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
 			 AMDGPU_PTE_MTYPE_VG10(mtype_local);
-- 
cgit 


From dc12f9eddedb8b41f4dc948e5e636e5221fb4d43 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Thu, 2 Feb 2023 11:07:53 -0500
Subject: drm/amdkfd: Update MTYPE for far memory partition

Use MTYPE RW/MTYPE_CC for mapping system memory or VRAM to KFD node
within the same memory partition, use MTYPE_NC for mapping on KFD node
from the far memory partition of the same socket or from another socket
on same XGMI hive.

On NPS4 or 4P system, MTYPE will be overridden per page depending on
the memory NUMA node id and vm->mem_id.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 32eb4f4f5492..263d17a8b433 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1186,7 +1186,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 	bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
 	bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
 	bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
-	/* TODO: memory partitions struct amdgpu_vm *vm = mapping->bo_va->base.vm;*/
+	struct amdgpu_vm *vm = mapping->bo_va->base.vm;
 	unsigned int mtype_local, mtype;
 	bool snoop = false;
 	bool is_local;
@@ -1247,8 +1247,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 		}
 		is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
 			    num_possible_nodes() <= 1) ||
-			   (is_vram && adev == bo_adev /* TODO: memory partitions &&
-			    bo->mem_id == vm->mem_id*/);
+			   (is_vram && adev == bo_adev &&
+			    bo->mem_id == vm->mem_id);
 		snoop = true;
 		if (uncached) {
 			mtype = MTYPE_UC;
@@ -1335,13 +1335,12 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
 		return;
 	}
 
-	/* TODO: memory partitions. mem_id is hard-coded to 0 for now.
-	 * FIXME: Only supported on native mode for now. For carve-out, the
+	/* FIXME: Only supported on native mode for now. For carve-out, the
 	 * NUMA affinity of the GPU/VM needs to come from the PCI info because
 	 * memory partitions are not associated with different NUMA nodes.
 	 */
-	if (adev->gmc.is_app_apu) {
-		local_node = adev->gmc.mem_partitions[/*vm->mem_id*/0].numa.node;
+	if (adev->gmc.is_app_apu && vm->mem_id >= 0) {
+		local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node;
 	} else {
 		dev_dbg(adev->dev, "Only native mode APU is supported.\n");
 		return;
@@ -1356,7 +1355,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
 	}
 	nid = pfn_to_nid(addr >> PAGE_SHIFT);
 	dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
-		/*vm->mem_id*/0, local_node, nid);
+		vm->mem_id, local_node, nid);
 	if (nid == local_node) {
 		uint64_t old_flags = *flags;
 		unsigned int mtype_local = MTYPE_RW;
-- 
cgit 


From 3ebfd221c1a83e5f0edadb87d173d8fd93d1d125 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Wed, 8 Mar 2023 11:57:00 -0500
Subject: drm/amdkfd: Store xcp partition id to amdgpu bo

For memory accounting per compute partition and export drm amdgpu bo and
then import to KFD, we need the xcp id to account the memory usage or
find the KFD node of the original amdgpu bo to create the KFD bo on the
correct adev KFD node.

Set xcp_id_plus1 of amdgpu_bo_param to create bo and store xcp_id to
amddgpu bo. Add helper macro to get the mem_id from adev and xcp_id.

v2: squash in fix ("drm/amdgpu: Fix BO creation failure on GFX 9.4.3 dGPU")

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 263d17a8b433..7ea80bdf8e1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1248,7 +1248,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 		is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
 			    num_possible_nodes() <= 1) ||
 			   (is_vram && adev == bo_adev &&
-			    bo->mem_id == vm->mem_id);
+			    KFD_XCP_MEM_ID(adev, bo->xcp_id) == vm->mem_id);
 		snoop = true;
 		if (uncached) {
 			mtype = MTYPE_UC;
-- 
cgit 


From 45b3a914d40e63d2c9e3a3e02fb2014be975b9b0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 16 May 2023 17:16:30 -0400
Subject: drm/amdgpu/gmc9: fix 64 bit division in partition code

Rework logic or use do_div() to avoid problems on 32 bit.

v2: add a missing case for XCP macro
v3: fix out of bounds array access
v4: fix xcp handling harder

Acked-by: Guchun Chen <guchun.chen@amd.com> (v1)
Reviewed-by: Mukul Joshi <mukul.joshi@amd.com> (v3)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 7ea80bdf8e1e..f70e666cecf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1914,9 +1914,10 @@ gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev,
 	adev->gmc.num_mem_partitions = num_ranges;
 
 	/* If there is only partition, don't use entire size */
-	if (adev->gmc.num_mem_partitions == 1)
-		mem_ranges[0].size =
-			(mem_ranges[0].size * (mem_groups - 1) / mem_groups);
+	if (adev->gmc.num_mem_partitions == 1) {
+		mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1);
+		do_div(mem_ranges[0].size, mem_groups);
+	}
 }
 
 static void
@@ -1948,8 +1949,8 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
 		break;
 	}
 
-	size = (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) /
-	       adev->gmc.num_mem_partitions;
+	size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT;
+	size /= adev->gmc.num_mem_partitions;
 
 	for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
 		mem_ranges[i].range.fpfn = start_addr;
-- 
cgit 


From 6dabce860d40703d7c27b71a120317f09293cf9c Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Mon, 22 May 2023 00:30:15 -0700
Subject: drm/amdgpu: Fix unsigned comparison with zero in
 gmc_v9_0_process_interrupt()

Smatch warns:
	drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c:579:
	unsigned 'xcc_id' is never less than zero.

gfx_v9_4_3_ih_to_xcc_inst() returns negative numbers as well.
Fix this by changing type of xcc_id to int.

Fixes: 98b2e9cad227 ("drm/amdgpu: correct the vmhub index when page fault occurs")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f70e666cecf2..1e8b2aaa48c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -557,8 +557,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	const char *hub_name;
 	u64 addr;
 	uint32_t cam_index = 0;
-	int ret;
-	uint32_t node_id, xcc_id = 0;
+	int ret, xcc_id = 0;
+	uint32_t node_id;
 
 	node_id = entry->node_id;
 
-- 
cgit 


From 9535a86a4072babc37dc6bdadae52bdbb88166f5 Mon Sep 17 00:00:00 2001
From: Shiwu Zhang <shiwu.zhang@amd.com>
Date: Wed, 17 May 2023 14:15:05 +0800
Subject: drm/amdgpu: bypass bios dependent operations

Since bios reading does not work currently so just bypass all operations
related to bios

v2: hardcode the vram info for APP_APU case (hawking)
v3: correct the vram_width with channel number * channel size (lijo)

Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 63 ++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 24 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 1e8b2aaa48c1..be7823d82150 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -2010,34 +2010,49 @@ static int gmc_v9_0_sw_init(void *handle)
 
 	spin_lock_init(&adev->gmc.invalidate_lock);
 
-	r = amdgpu_atomfirmware_get_vram_info(adev,
-		&vram_width, &vram_type, &vram_vendor);
-	if (amdgpu_sriov_vf(adev))
-		/* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
-		 * and DF related registers is not readable, seems hardcord is the
-		 * only way to set the correct vram_width
-		 */
-		adev->gmc.vram_width = 2048;
-	else if (amdgpu_emu_mode != 1)
-		adev->gmc.vram_width = vram_width;
+	if (!(adev->bios) || adev->gmc.is_app_apu) {
+		if (adev->flags & AMD_IS_APU) {
+			if (adev->gmc.is_app_apu) {
+				adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+				adev->gmc.vram_width = 128 * 64;
+			} else {
+				adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
+				adev->gmc.vram_width = 64 * 64;
+			}
+		} else {
+			adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+			adev->gmc.vram_width = 128 * 64;
+		}
+	} else {
+		r = amdgpu_atomfirmware_get_vram_info(adev,
+			&vram_width, &vram_type, &vram_vendor);
+		if (amdgpu_sriov_vf(adev))
+			/* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
+			 * and DF related registers is not readable, seems hardcord is the
+			 * only way to set the correct vram_width
+			 */
+			adev->gmc.vram_width = 2048;
+		else if (amdgpu_emu_mode != 1)
+			adev->gmc.vram_width = vram_width;
 
-	if (!adev->gmc.vram_width) {
-		int chansize, numchan;
+		if (!adev->gmc.vram_width) {
+			int chansize, numchan;
 
-		/* hbm memory channel size */
-		if (adev->flags & AMD_IS_APU)
-			chansize = 64;
-		else
-			chansize = 128;
-		if (adev->df.funcs &&
-		    adev->df.funcs->get_hbm_channel_number) {
-			numchan = adev->df.funcs->get_hbm_channel_number(adev);
-			adev->gmc.vram_width = numchan * chansize;
+			/* hbm memory channel size */
+			if (adev->flags & AMD_IS_APU)
+				chansize = 64;
+			else
+				chansize = 128;
+			if (adev->df.funcs &&
+			    adev->df.funcs->get_hbm_channel_number) {
+				numchan = adev->df.funcs->get_hbm_channel_number(adev);
+				adev->gmc.vram_width = numchan * chansize;
+			}
 		}
-	}
 
-	adev->gmc.vram_type = vram_type;
-	adev->gmc.vram_vendor = vram_vendor;
+		adev->gmc.vram_type = vram_type;
+		adev->gmc.vram_vendor = vram_vendor;
+	}
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(9, 1, 0):
 	case IP_VERSION(9, 2, 2):
-- 
cgit 


From 1bae03aab2b41770b9198b3ef1ddc7dc7efb0678 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Tue, 30 May 2023 14:43:14 +0530
Subject: drm/amdgpu: Fix up missing parameter in kdoc for 'inst' in gmc_ v7,
 v8, v9, v10, v11.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix these warnings by adding 'inst' arguments to kdocs.

gcc with W=1
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c:428: warning: Function parameter or member 'inst' not described in 'gmc_v7_0_flush_gpu_tlb_pasid'
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c:626: warning: Function parameter or member 'inst' not described in 'gmc_v8_0_flush_gpu_tlb_pasid'
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c:423: warning: Function parameter or member 'inst' not described in 'gmc_v10_0_flush_gpu_tlb_pasid'
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c:328: warning: Function parameter or member 'inst' not described in 'gmc_v11_0_flush_gpu_tlb_pasid'
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c:950: warning: Function parameter or member 'inst' not described in 'gmc_v9_0_flush_gpu_tlb_pasid'

Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index be7823d82150..3ed286b72cae 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -941,6 +941,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
  * @pasid: pasid to be flush
  * @flush_type: the flush type
  * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
  *
  * Flush the TLB for the requested pasid.
  */
-- 
cgit