From 8dc1db3172ae2f17ae71e33b608a33411ce8a1aa Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 14 Sep 2022 16:39:48 +0800 Subject: drm/amdkfd: Introduce kfd_node struct (v5) Introduce a new structure, kfd_node, which will now represent a compute node. kfd_node is carved out of kfd_dev structure. kfd_dev struct now will become the parent of kfd_node, and will store common resources such as doorbells, GTT sub-alloctor etc. kfd_node struct will store all resources specific to a compute node, such as device queue manager, interrupt handling etc. This is the first step in adding compute partition support in KFD. v2: introduce kfd_node struct to gc v11 (Hawking) v3: make reference to kfd_dev struct through kfd_node (Morris) v4: use kfd_node instead for kfd isr/mqd functions (Morris) v5: rebase (Alex) Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Hawking Zhang Signed-off-by: Morris Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 475e47027354..f5aebba31e88 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1405,7 +1405,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, return i; } -int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info) +int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) { int num_of_cache_types = 0; @@ -1524,7 +1524,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): num_of_cache_types = - kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); + kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); break; default: *pcache_info = dummy_cache_info; @@ -1858,7 +1858,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) } static int kfd_fill_gpu_memory_affinity(int *avail_size, - struct kfd_dev *kdev, uint8_t type, uint64_t size, + struct kfd_node *kdev, uint8_t type, uint64_t size, struct crat_subtype_memory *sub_type_hdr, uint32_t proximity_domain, const struct kfd_local_mem_info *local_mem_info) @@ -1887,7 +1887,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size, } #ifdef CONFIG_ACPI_NUMA -static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) +static void kfd_find_numa_node_in_srat(struct kfd_node *kdev) { struct acpi_table_header *table_header = NULL; struct acpi_subtable_header *sub_header = NULL; @@ -1982,7 +1982,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) * Return 0 if successful else return -ve value */ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, - struct kfd_dev *kdev, + struct kfd_node *kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain) { @@ -2044,8 +2044,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, } static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, - struct kfd_dev *kdev, - struct kfd_dev *peer_kdev, + struct kfd_node *kdev, + struct kfd_node *peer_kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain_from, uint32_t proximity_domain_to) @@ -2081,7 +2081,7 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, * [OUT] actual size of data filled in crat_image */ static int kfd_create_vcrat_image_gpu(void *pcrat_image, - size_t *size, struct kfd_dev *kdev, + size_t *size, struct kfd_node *kdev, uint32_t proximity_domain) { struct crat_header *crat_table = (struct crat_header *)pcrat_image; @@ -2153,7 +2153,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, /* Check if this node supports IOMMU. During parsing this flag will * translate to HSA_CAP_ATS_PRESENT */ - if (!kfd_iommu_check_device(kdev)) + if (!kfd_iommu_check_device(kdev->kfd)) cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; crat_table->length += sub_type_hdr->length; @@ -2164,7 +2164,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * report the total FB size (public+private) as a single * private heap. */ - local_mem_info = kdev->local_mem_info; + local_mem_info = kdev->kfd->local_mem_info; sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); @@ -2216,12 +2216,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * (from other GPU to this GPU) will be added * in kfd_parse_subtype_iolink. */ - if (kdev->hive_id) { + if (kdev->kfd->hive_id) { for (nid = 0; nid < proximity_domain; ++nid) { peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid); if (!peer_dev->gpu) continue; - if (peer_dev->gpu->hive_id != kdev->hive_id) + if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id) continue; sub_type_hdr = (typeof(sub_type_hdr))( (char *)sub_type_hdr + @@ -2255,12 +2255,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU * -- this option is not currently implemented. * The assumption is that all AMD APUs will have CRAT - * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU + * @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU * * Return 0 if successful else return -ve value */ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, - int flags, struct kfd_dev *kdev, + int flags, struct kfd_node *kdev, uint32_t proximity_domain) { void *pcrat_image = NULL; -- cgit From 92085240ef9c0ec60c27a60b3cc0d4f5266fa511 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 3 May 2022 10:16:46 -0400 Subject: drm/amdkfd: add gpu compute cores io links for gfx9.4.3 The PSP TA will only provide xGMI topology info for links between GPU sockets so links between partitions from different sockets will be hardcoded as 3 xGMI hops with 1 hops weighted as xGMI and 2 hops weighted with a new intra-socket weight to indicate the longest possible distance. If the link between a partition and the CPU is non-PCIe, then assume the CPU (CCDs) is located within the same socket as the partition and represent the link as an intra-socket weighted single hop XGMI link with memory bandwidth. Links between partitions within a single socket will be abstracted as single hop xGMI links weighted with the new intra-socket weight and will have memory bandwidth. Finally, use the unused function bits in the location ID to represent the coordinates of the compute partition within its socket. A follow on patch will resolve the requirement for GPU socket xGMI link representation sometime later. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 49 ++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index f5aebba31e88..dc93a67257e1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1166,7 +1166,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) props->weight = 20; else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) - props->weight = 15 * iolink->num_hops_xgmi; + props->weight = iolink->weight_xgmi; else props->weight = node_distance(id_from, id_to); @@ -1972,6 +1972,9 @@ static void kfd_find_numa_node_in_srat(struct kfd_node *kdev) } #endif +#define KFD_CRAT_INTRA_SOCKET_WEIGHT 13 +#define KFD_CRAT_XGMI_WEIGHT 15 + /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU * to its NUMA node * @avail_size: Available size in the memory @@ -2003,6 +2006,12 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, * TODO: Fill-in other fields of iolink subtype */ if (kdev->adev->gmc.xgmi.connected_to_cpu) { + bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3); + int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT : + KFD_CRAT_INTRA_SOCKET_WEIGHT; + uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( + kdev->adev, NULL, true) : mem_bw; + /* * with host gpu xgmi link, host can access gpu memory whether * or not pcie bar type is large, so always create bidirectional @@ -2010,14 +2019,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, */ sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; - sub_type_hdr->num_hops_xgmi = 1; - if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) { - sub_type_hdr->minimum_bandwidth_mbs = - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( - kdev->adev, NULL, true); - sub_type_hdr->maximum_bandwidth_mbs = - sub_type_hdr->minimum_bandwidth_mbs; - } + sub_type_hdr->weight_xgmi = weight; + sub_type_hdr->minimum_bandwidth_mbs = bandwidth; + sub_type_hdr->maximum_bandwidth_mbs = bandwidth; } else { sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; sub_type_hdr->minimum_bandwidth_mbs = @@ -2050,6 +2054,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, uint32_t proximity_domain_from, uint32_t proximity_domain_to) { + bool use_ta_info = kdev->kfd->num_nodes == 1; + *avail_size -= sizeof(struct crat_subtype_iolink); if (*avail_size < 0) return -ENOMEM; @@ -2064,12 +2070,25 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; sub_type_hdr->proximity_domain_from = proximity_domain_from; sub_type_hdr->proximity_domain_to = proximity_domain_to; - sub_type_hdr->num_hops_xgmi = - amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); - sub_type_hdr->maximum_bandwidth_mbs = - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false); - sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; + + if (use_ta_info) { + sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT * + amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); + sub_type_hdr->maximum_bandwidth_mbs = + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, + peer_kdev->adev, false); + sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; + } else { + bool is_single_hop = kdev->kfd == peer_kdev->kfd; + int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT : + (2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT; + int mem_bw = 819200; + + sub_type_hdr->weight_xgmi = weight; + sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0; + sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0; + } return 0; } -- cgit From b2ef2fdffed2a7fd5bf3f178a6a0427487dba5dd Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Thu, 5 Jan 2023 11:39:34 -0500 Subject: drm/amdkfd: Report XGMI IOLINKs for GFXIP9.4.3 GFXIP 9.4.3 could be in APU or carveout mode but we cannot use the xgmi.connected_to_cpu flag to identify the iolinks type. Use appropriate APU or Carveout mode based condition to report xgmi connection in kfd topology. Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index dc93a67257e1..16475921587b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2005,7 +2005,10 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, /* Fill in IOLINK subtype. * TODO: Fill-in other fields of iolink subtype */ - if (kdev->adev->gmc.xgmi.connected_to_cpu) { + if (kdev->adev->gmc.xgmi.connected_to_cpu || + (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) && + kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) == + AMDGPU_PKG_TYPE_APU)) { bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3); int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT : KFD_CRAT_INTRA_SOCKET_WEIGHT; -- cgit From 228ce176434b0f61451019065393040d58e1668d Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 27 Jan 2023 21:57:00 -0500 Subject: drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3 [For 1P NPS1 mode driver bringup] Changes required to initialize the amdgpu driver with frontdoor firmware loading and discovery=2 with the native mode SBIOS that enables CPU GPU unified interleaved memory. sudo modprobe amdgpu discovery=2 Once PSP TMR region is reported via the ACPI interface, the dependency on the ip_discovery.bin will be removed. Choice of where to allocate driver table is given to each IP version. In general, both GTT and VRAM domains will be considered. If one of the tables has a strict restriction for VRAM domain, then only VRAM domain is considered. Reviewed-by: Felix Kuehling (lijo: Modified the handling for SMU Tables) Signed-off-by: Lijo Lazar Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 16475921587b..1aaf933f9f48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -30,6 +30,9 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" +/* Fixme: Fake 32GB for 1PNPS1 mode bringup */ +#define DUMMY_VRAM_SIZE 31138512896 + /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs @@ -1053,6 +1056,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, props->heap_type = heap_type; props->flags = flags; + if (size_in_bytes == 0) + size_in_bytes = DUMMY_VRAM_SIZE; /* Fixme: TBD */ props->size_in_bytes = size_in_bytes; props->width = width; -- cgit From 315e29eca57f85107cc6f687c2d510aa532fb3f0 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 20 Mar 2023 11:21:38 -0400 Subject: drm/amdkfd: Move local_mem_info to kfd_node We need to track memory usage on a per partition basis. To do that, store the local memory information in KFD node instead of kfd device. v2: squash in fix ("amdkfd: Use mem_id to access mem_partition info") Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 1aaf933f9f48..950af6820153 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2191,7 +2191,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * report the total FB size (public+private) as a single * private heap. */ - local_mem_info = kdev->kfd->local_mem_info; + local_mem_info = kdev->local_mem_info; sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); -- cgit From bbcc3514ab4f7ec3ae2273ad08b0a1b6b4aa9dd9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 2 Jun 2023 07:18:06 -0500 Subject: drm/amd: Check that a system is a NUMA system before looking for SRAT It's pointless on laptops to look for the SRAT table as these are not NUMA. Check the number of possible nodes is > 1 to decide whether to look for SRAT. Suggested-by: Felix Kuehling Signed-off-by: Mario Limonciello Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 950af6820153..3dcd8f8bc98e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2041,7 +2041,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, sub_type_hdr->proximity_domain_from = proximity_domain; #ifdef CONFIG_ACPI_NUMA - if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) + if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE && + num_possible_nodes() > 1) kfd_find_numa_node_in_srat(kdev); #endif #ifdef CONFIG_NUMA -- cgit