From 8dc1db3172ae2f17ae71e33b608a33411ce8a1aa Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 14 Sep 2022 16:39:48 +0800 Subject: drm/amdkfd: Introduce kfd_node struct (v5) Introduce a new structure, kfd_node, which will now represent a compute node. kfd_node is carved out of kfd_dev structure. kfd_dev struct now will become the parent of kfd_node, and will store common resources such as doorbells, GTT sub-alloctor etc. kfd_node struct will store all resources specific to a compute node, such as device queue manager, interrupt handling etc. This is the first step in adding compute partition support in KFD. v2: introduce kfd_node struct to gc v11 (Hawking) v3: make reference to kfd_dev struct through kfd_node (Morris) v4: use kfd_node instead for kfd isr/mqd functions (Morris) v5: rebase (Alex) Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Hawking Zhang Signed-off-by: Morris Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 56 +++++++++++++++---------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 8e4124dcb6e4..06a11186d947 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -96,7 +96,7 @@ struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id) return ret; } -struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) +struct kfd_node *kfd_device_by_id(uint32_t gpu_id) { struct kfd_topology_device *top_dev; @@ -107,10 +107,10 @@ struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) return top_dev->gpu; } -struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) +struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev) { struct kfd_topology_device *top_dev; - struct kfd_dev *device = NULL; + struct kfd_node *device = NULL; down_read(&topology_lock); @@ -125,10 +125,10 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) return device; } -struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev) +struct kfd_node *kfd_device_by_adev(const struct amdgpu_device *adev) { struct kfd_topology_device *top_dev; - struct kfd_dev *device = NULL; + struct kfd_node *device = NULL; down_read(&topology_lock); @@ -526,7 +526,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, if (dev->gpu) { log_max_watch_addr = - __ilog2_u32(dev->gpu->device_info.num_of_watch_points); + __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points); if (log_max_watch_addr) { dev->node_props.capability |= @@ -548,11 +548,11 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL); sysfs_show_32bit_prop(buffer, offs, "fw_version", - dev->gpu->mec_fw_version); + dev->gpu->kfd->mec_fw_version); sysfs_show_32bit_prop(buffer, offs, "capability", dev->node_props.capability); sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", - dev->gpu->sdma_fw_version); + dev->gpu->kfd->sdma_fw_version); sysfs_show_64bit_prop(buffer, offs, "unique_id", dev->gpu->adev->unique_id); @@ -1157,7 +1157,7 @@ void kfd_topology_shutdown(void) up_write(&topology_lock); } -static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) +static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) { uint32_t hashout; uint32_t buf[7]; @@ -1167,8 +1167,8 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) if (!gpu) return 0; - local_mem_size = gpu->local_mem_info.local_mem_size_private + - gpu->local_mem_info.local_mem_size_public; + local_mem_size = gpu->kfd->local_mem_info.local_mem_size_private + + gpu->kfd->local_mem_info.local_mem_size_public; buf[0] = gpu->adev->pdev->devfn; buf[1] = gpu->adev->pdev->subsystem_vendor | (gpu->adev->pdev->subsystem_device << 16); @@ -1188,7 +1188,7 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) * list then return NULL. This means a new topology device has to * be created for this GPU. */ -static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) +static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu) { struct kfd_topology_device *dev; struct kfd_topology_device *out_dev = NULL; @@ -1201,7 +1201,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) /* Discrete GPUs need their own topology device list * entries. Don't assign them to CPU/APU nodes. */ - if (!gpu->use_iommu_v2 && + if (!gpu->kfd->use_iommu_v2 && dev->node_props.cpu_cores_count) continue; @@ -1275,7 +1275,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; /* set gpu (dev) flags. */ } else { - if (!dev->gpu->pci_atomic_requested || + if (!dev->gpu->kfd->pci_atomic_requested || dev->gpu->adev->asic_type == CHIP_HAWAII) link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; @@ -1569,8 +1569,8 @@ static int kfd_dev_create_p2p_links(void) if (dev == new_dev) break; if (!dev->gpu || !dev->gpu->adev || - (dev->gpu->hive_id && - dev->gpu->hive_id == new_dev->gpu->hive_id)) + (dev->gpu->kfd->hive_id && + dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) goto next; /* check if node(s) is/are peer accessible in one direction or bi-direction */ @@ -1590,7 +1590,6 @@ out: return ret; } - /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, struct kfd_gpu_cache_info *pcache_info, @@ -1723,7 +1722,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info * tables */ -static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev) +static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev) { struct kfd_gpu_cache_info *pcache_info = NULL; int i, j, k; @@ -1805,7 +1804,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct pr_debug("Added [%d] GPU cache entries\n", num_of_entries); } -static int kfd_topology_add_device_locked(struct kfd_dev *gpu, uint32_t gpu_id, +static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id, struct kfd_topology_device **dev) { int proximity_domain = ++topology_crat_proximity_domain; @@ -1865,7 +1864,7 @@ err: return res; } -int kfd_topology_add_device(struct kfd_dev *gpu) +int kfd_topology_add_device(struct kfd_node *gpu) { uint32_t gpu_id; struct kfd_topology_device *dev; @@ -1916,7 +1915,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine; - dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version; + dev->node_props.gfx_target_version = + gpu->kfd->device_info.gfx_target_version; dev->node_props.vendor_id = gpu->adev->pdev->vendor; dev->node_props.device_id = gpu->adev->pdev->device; dev->node_props.capability |= @@ -1929,15 +1929,15 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.max_engine_clk_ccompute = cpufreq_quick_get_max(0) / 1000; dev->node_props.drm_render_minor = - gpu->shared_resources.drm_render_minor; + gpu->kfd->shared_resources.drm_render_minor; - dev->node_props.hive_id = gpu->hive_id; + dev->node_props.hive_id = gpu->kfd->hive_id; dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); dev->node_props.num_sdma_xgmi_engines = kfd_get_num_xgmi_sdma_engines(gpu); dev->node_props.num_sdma_queues_per_engine = - gpu->device_info.num_sdma_queues_per_engine - - gpu->device_info.num_reserved_sdma_queues_per_engine; + gpu->kfd->device_info.num_sdma_queues_per_engine - + gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; dev->node_props.num_gws = (dev->gpu->gws && dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? dev->gpu->adev->gds.gws_size : 0; @@ -1979,7 +1979,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * Overwrite ATS capability according to needs_iommu_device to fix * potential missing corresponding bit in CRAT of BIOS. */ - if (dev->gpu->use_iommu_v2) + if (dev->gpu->kfd->use_iommu_v2) dev->node_props.capability |= HSA_CAP_ATS_PRESENT; else dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; @@ -2079,7 +2079,7 @@ static void kfd_topology_update_io_links(int proximity_domain) } } -int kfd_topology_remove_device(struct kfd_dev *gpu) +int kfd_topology_remove_device(struct kfd_node *gpu) { struct kfd_topology_device *dev, *tmp; uint32_t gpu_id; @@ -2119,7 +2119,7 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) * Return - 0: On success (@kdev will be NULL for non GPU nodes) * -1: If end of list */ -int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) +int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev) { struct kfd_topology_device *top_dev; -- cgit From 74c5b85da75475c73a8f040397610fbfcc2c3e78 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 9 May 2022 16:33:38 -0400 Subject: drm/amdkfd: Add spatial partitioning support in KFD This patch introduces multi-partition support in KFD. This patch includes: - Support for maximum 8 spatial partitions in KFD. - Initialize one HIQ per partition. - Management of VMID range depending on partition mode. - Management of doorbell aperture space between all partitions. - Each partition does its own queue management, interrupt handling, SMI event reporting. - IOMMU, if enabled with multiple partitions, will only work on first partition. - SPM is only supported on the first partition. - Currently, there is no support for resetting individual partitions. All partitions will reset together. Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 06a11186d947..94af37df3ed2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -555,7 +555,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->gpu->kfd->sdma_fw_version); sysfs_show_64bit_prop(buffer, offs, "unique_id", dev->gpu->adev->unique_id); - + sysfs_show_32bit_prop(buffer, offs, "num_xcc", + dev->gpu->num_xcc_per_node); } return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute", @@ -1160,7 +1161,7 @@ void kfd_topology_shutdown(void) static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) { uint32_t hashout; - uint32_t buf[7]; + uint32_t buf[8]; uint64_t local_mem_size; int i; @@ -1177,8 +1178,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) buf[4] = gpu->adev->pdev->bus->number; buf[5] = lower_32_bits(local_mem_size); buf[6] = upper_32_bits(local_mem_size); + buf[7] = gpu->start_xcc_id | (gpu->num_xcc_per_node << 16); - for (i = 0, hashout = 0; i < 7; i++) + for (i = 0, hashout = 0; i < 8; i++) hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); return hashout; -- cgit From f38f147ab3121adbd7510a82e6eb0b41a356c26e Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 9 May 2022 22:35:55 -0400 Subject: drm/amdkfd: Update sysfs node properties for multi XCC Update simd_count and array_count node properties to report values multiplied by number of XCCs in the partition. Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 94af37df3ed2..6d958bf0fe90 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -468,7 +468,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count", dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, offs, "simd_count", - dev->gpu ? dev->node_props.simd_count : 0); + dev->gpu ? (dev->node_props.simd_count * + dev->gpu->num_xcc_per_node) : 0); sysfs_show_32bit_prop(buffer, offs, "mem_banks_count", dev->node_props.mem_banks_count); sysfs_show_32bit_prop(buffer, offs, "caches_count", @@ -492,7 +493,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "wave_front_size", dev->node_props.wave_front_size); sysfs_show_32bit_prop(buffer, offs, "array_count", - dev->node_props.array_count); + dev->gpu ? (dev->node_props.array_count * + dev->gpu->num_xcc_per_node) : 0); sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine", dev->node_props.simd_arrays_per_engine); sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array", -- cgit From 92085240ef9c0ec60c27a60b3cc0d4f5266fa511 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 3 May 2022 10:16:46 -0400 Subject: drm/amdkfd: add gpu compute cores io links for gfx9.4.3 The PSP TA will only provide xGMI topology info for links between GPU sockets so links between partitions from different sockets will be hardcoded as 3 xGMI hops with 1 hops weighted as xGMI and 2 hops weighted with a new intra-socket weight to indicate the longest possible distance. If the link between a partition and the CPU is non-PCIe, then assume the CPU (CCDs) is located within the same socket as the partition and represent the link as an intra-socket weighted single hop XGMI link with memory bandwidth. Links between partitions within a single socket will be abstracted as single hop xGMI links weighted with the new intra-socket weight and will have memory bandwidth. Finally, use the unused function bits in the location ID to represent the coordinates of the compute partition within its socket. A follow on patch will resolve the requirement for GPU socket xGMI link representation sometime later. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 6d958bf0fe90..d3e70341dfad 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1926,7 +1926,11 @@ int kfd_topology_add_device(struct kfd_node *gpu) dev->node_props.capability |= ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & HSA_CAP_ASIC_REVISION_MASK); + dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); + if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3)) + dev->node_props.location_id |= dev->gpu->node_id; + dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); dev->node_props.max_engine_clk_fcompute = amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); -- cgit From b2ef2fdffed2a7fd5bf3f178a6a0427487dba5dd Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Thu, 5 Jan 2023 11:39:34 -0500 Subject: drm/amdkfd: Report XGMI IOLINKs for GFXIP9.4.3 GFXIP 9.4.3 could be in APU or carveout mode but we cannot use the xgmi.connected_to_cpu flag to identify the iolinks type. Use appropriate APU or Carveout mode based condition to report xgmi connection in kfd topology. Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index d3e70341dfad..5373a79ac6a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1327,9 +1327,8 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) continue; /* Include the CPU peer in GPU hive if connected over xGMI. */ - if (!peer_dev->gpu && !peer_dev->node_props.hive_id && - dev->node_props.hive_id && - dev->gpu->adev->gmc.xgmi.connected_to_cpu) + if (!peer_dev->gpu && + link->iolink_type == CRAT_IOLINK_TYPE_XGMI) peer_dev->node_props.hive_id = dev->node_props.hive_id; list_for_each_entry(inbound_link, &peer_dev->io_link_props, -- cgit From 1698e200e88db96aef7d16aa3d63df68a209ffbd Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 2 Feb 2023 11:10:08 -0500 Subject: drm/amdkfd: bind cpu and hiveless gpu to a hive if xgmi connected If a CPU and GPU are xGMI connected but the GPU is hiveless with respect to other GPUs, create a new CPU-GPU hive using the GPU's PCI device location ID as the new hive ID to maintain fine grain memory access usage. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 5373a79ac6a1..c7072fff778e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1328,8 +1328,15 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) /* Include the CPU peer in GPU hive if connected over xGMI. */ if (!peer_dev->gpu && - link->iolink_type == CRAT_IOLINK_TYPE_XGMI) + link->iolink_type == CRAT_IOLINK_TYPE_XGMI) { + /* + * If the GPU is not part of a GPU hive, use its pci + * device location as the hive ID to bind with the CPU. + */ + if (!dev->node_props.hive_id) + dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev); peer_dev->node_props.hive_id = dev->node_props.hive_id; + } list_for_each_entry(inbound_link, &peer_dev->io_link_props, list) { -- cgit From c4050ff1a43eec08498b1ed876efc6213592dba0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 9 Feb 2023 16:30:53 +0530 Subject: drm/amdkfd: Use xcc mask for identifying xcc Instead of start xcc id and number of xcc per node, use the xcc mask which is the mask of logical ids of xccs belonging to a parition. Signed-off-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index c7072fff778e..d2a42b6b1fa8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -469,7 +469,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, offs, "simd_count", dev->gpu ? (dev->node_props.simd_count * - dev->gpu->num_xcc_per_node) : 0); + NUM_XCC(dev->gpu->xcc_mask)) : 0); sysfs_show_32bit_prop(buffer, offs, "mem_banks_count", dev->node_props.mem_banks_count); sysfs_show_32bit_prop(buffer, offs, "caches_count", @@ -494,7 +494,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.wave_front_size); sysfs_show_32bit_prop(buffer, offs, "array_count", dev->gpu ? (dev->node_props.array_count * - dev->gpu->num_xcc_per_node) : 0); + NUM_XCC(dev->gpu->xcc_mask)) : 0); sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine", dev->node_props.simd_arrays_per_engine); sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array", @@ -558,7 +558,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_64bit_prop(buffer, offs, "unique_id", dev->gpu->adev->unique_id); sysfs_show_32bit_prop(buffer, offs, "num_xcc", - dev->gpu->num_xcc_per_node); + NUM_XCC(dev->gpu->xcc_mask)); } return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute", @@ -1180,7 +1180,7 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) buf[4] = gpu->adev->pdev->bus->number; buf[5] = lower_32_bits(local_mem_size); buf[6] = upper_32_bits(local_mem_size); - buf[7] = gpu->start_xcc_id | (gpu->num_xcc_per_node << 16); + buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); for (i = 0, hashout = 0; i < 8; i++) hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); -- cgit From 610dab118ff5013d46069c828b58d576e0907b66 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 31 Mar 2023 11:13:40 -0400 Subject: drm/amdkfd: Move pgmap to amdgpu_kfd_dev structure VRAM pgmap resource is allocated every time when switching compute partitions because kfd_dev is re-initialized by post_partition_switch, As a result, it causes memory region resource leaking and system memory usage accounting unbalanced. pgmap resource should be allocated and registered only once when loading driver and freed when unloading driver, move it from kfd_dev to amdgpu_kfd_dev. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index d2a42b6b1fa8..6d6243b978e1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -2021,7 +2021,7 @@ int kfd_topology_add_device(struct kfd_node *gpu) dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? HSA_CAP_RASEVENTNOTIFY : 0; - if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev->kfd.dev)) + if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev)) dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; kfd_debug_print_topology(); -- cgit From a476c0c645535cc0361938becb440b4239996079 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 23 Feb 2023 11:03:37 -0500 Subject: drm/amdkfd: Store drm node minor number for kfd nodes From KFD topology, application will find kfd node with the corresponding drm device node minor number, for example if partition drm node starts from /dev/dri/renderD129, then KFD node 0 with store drm node minor number 129. Application will open drm node /dev/dri/renderD129 to create amdgpu vm for kfd node 0 with the correct vm->mem_id to indicate the memory partition. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 6d6243b978e1..a8e25aecf839 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1942,8 +1942,12 @@ int kfd_topology_add_device(struct kfd_node *gpu) amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); dev->node_props.max_engine_clk_ccompute = cpufreq_quick_get_max(0) / 1000; - dev->node_props.drm_render_minor = - gpu->kfd->shared_resources.drm_render_minor; + + if (gpu->xcp) + dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index; + else + dev->node_props.drm_render_minor = + gpu->kfd->shared_resources.drm_render_minor; dev->node_props.hive_id = gpu->kfd->hive_id; dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); -- cgit From 2fa9ff25de08e598af051c76b216d2f073b2ee89 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 9 Mar 2023 19:30:02 -0500 Subject: drm/amdgpu: KFD graphics interop support compute partition kfd_ioctl_get_dmabuf use the amdgpu bo xcp_id to get the gpu_id of the KFD node from the exported dmabuf_adev, and then create kfd bo on the correct adev and KFD node when importing the amdgpu bo to KFD. Remove function kfd_device_by_adev, it is not needed as it is the same result as dmabuf_adev->kfd.dev->nodes[0]->id. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index a8e25aecf839..dbb6159344b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -125,24 +125,6 @@ struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev) return device; } -struct kfd_node *kfd_device_by_adev(const struct amdgpu_device *adev) -{ - struct kfd_topology_device *top_dev; - struct kfd_node *device = NULL; - - down_read(&topology_lock); - - list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu && top_dev->gpu->adev == adev) { - device = top_dev->gpu; - break; - } - - up_read(&topology_lock); - - return device; -} - /* Called with write topology_lock acquired */ static void kfd_release_topology_device(struct kfd_topology_device *dev) { -- cgit From 315e29eca57f85107cc6f687c2d510aa532fb3f0 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 20 Mar 2023 11:21:38 -0400 Subject: drm/amdkfd: Move local_mem_info to kfd_node We need to track memory usage on a per partition basis. To do that, store the local memory information in KFD node instead of kfd device. v2: squash in fix ("amdkfd: Use mem_id to access mem_partition info") Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index dbb6159344b3..e0bacf017a40 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1152,8 +1152,8 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) if (!gpu) return 0; - local_mem_size = gpu->kfd->local_mem_info.local_mem_size_private + - gpu->kfd->local_mem_info.local_mem_size_public; + local_mem_size = gpu->local_mem_info.local_mem_size_private + + gpu->local_mem_info.local_mem_size_public; buf[0] = gpu->adev->pdev->devfn; buf[1] = gpu->adev->pdev->subsystem_vendor | (gpu->adev->pdev->subsystem_device << 16); @@ -1234,7 +1234,8 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) * for APUs - If CRAT from ACPI reports more than one bank, then * all the banks will report the same mem_clk_max information */ - amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info, + dev->gpu->xcp->id); list_for_each_entry(mem, &dev->mem_props, list) mem->mem_clk_max = local_mem_info.mem_clk_max; -- cgit From 9a3ce1a7a9e5372d8c275bf3fbef4456c8407145 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 12 May 2023 13:22:57 +0800 Subject: drm/amdgpu: Do not access members of xcp w/o check (v2) Not all the asic needs xcp. ensure check xcp availabity before accessing its member. v2: add missing change in kfd_topology.c Signed-off-by: Hawking Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index e0bacf017a40..8302d8967158 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1235,7 +1235,7 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) * all the banks will report the same mem_clk_max information */ amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info, - dev->gpu->xcp->id); + dev->gpu->xcp); list_for_each_entry(mem, &dev->mem_props, list) mem->mem_clk_max = local_mem_info.mem_clk_max; -- cgit From d230f1bfe7a1977565ce1e2804ddb7b7a3d911ff Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 25 Mar 2022 12:39:06 -0400 Subject: drm/amdkfd: display debug capabilities Expose debug capabilities in the KFD topology node's HSA capabilities and debug properties flags. Ensure correct capabilities are exposed based on firmware support. Flag definitions can be referenced in uapi/linux/kfd_sysfs.h. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 101 ++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 8302d8967158..3def25b2bdbb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -535,6 +535,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->gpu->kfd->mec_fw_version); sysfs_show_32bit_prop(buffer, offs, "capability", dev->node_props.capability); + sysfs_show_64bit_prop(buffer, offs, "debug_prop", + dev->node_props.debug_prop); sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", dev->gpu->kfd->sdma_fw_version); sysfs_show_64bit_prop(buffer, offs, "unique_id", @@ -1857,6 +1859,97 @@ err: return res; } +static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *dev) +{ + bool firmware_supported = true; + + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { + firmware_supported = + (dev->gpu->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 9; + goto out; + } + + /* + * Note: Any unlisted devices here are assumed to support exception handling. + * Add additional checks here as needed. + */ + switch (KFD_GC_VERSION(dev->gpu)) { + case IP_VERSION(9, 0, 1): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768; + break; + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 1): + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 3, 0): + case IP_VERSION(9, 4, 0): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 459; + break; + case IP_VERSION(9, 4, 1): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 60; + break; + case IP_VERSION(9, 4, 2): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 51; + break; + case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 2): + case IP_VERSION(10, 1, 1): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 144; + break; + case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 1): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 89; + break; + case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 3, 3): + firmware_supported = false; + break; + default: + break; + } + +out: + if (firmware_supported) + dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED; +} + +static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) +{ + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + + dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT | + HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED | + HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED; + + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { + dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 | + HSA_DBG_WATCH_ADDR_MASK_HI_BIT; + + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 4, 2)) + dev->node_props.debug_prop |= + HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; + else + dev->node_props.capability |= + HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; + } else { + dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | + HSA_DBG_WATCH_ADDR_MASK_HI_BIT; + + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0)) + dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; + else + dev->node_props.capability |= + HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; + } + + kfd_topology_set_dbg_firmware_support(dev); +} + int kfd_topology_add_device(struct kfd_node *gpu) { uint32_t gpu_id; @@ -1967,13 +2060,11 @@ int kfd_topology_add_device(struct kfd_node *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; default: - if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1)) - dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << - HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & - HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); - else + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1)) WARN(1, "Unexpected ASIC family %u", dev->gpu->adev->asic_type); + else + kfd_topology_set_capabilities(dev); } /* -- cgit From 69a8c3ae2dea84a6d571e4c1aad306f630f3ccfd Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 1 Sep 2022 11:27:15 -0400 Subject: drm/amdkfd: apply trap workaround for gfx11 Due to a HW bug, waves in only half the shader arrays can enter trap. When starting a debug session, relocate all waves to the first shader array of each shader engine and mask off the 2nd shader array as unavailable. When ending a debug session, re-enable the 2nd shader array per shader engine. User CU masking per queue cannot be guaranteed to remain functional if requested during debugging (e.g. user cu mask requests only 2nd shader array as an available resource leading to zero HW resources available) nor can runtime be alerted of any of these changes during execution. Make user CU masking and debugging mutual exclusive with respect to availability. If the debugger tries to attach to a process with a user cu masked queue, return the runtime status as enabled but busy. If the debugger tries to attach and fails to reallocate queue waves to the first shader array of each shader engine, return the runtime status as enabled but with an error. In addition, like any other mutli-process debug supported devices, disable trap temporary setup per-process to avoid performance impact from setup overhead. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3def25b2bdbb..faa7939f35bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1863,10 +1863,13 @@ static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *de { bool firmware_supported = true; + /* + * FIXME: GFX11 FW currently not sufficient to deal with CWSR WA. + * Updated FW with API changes coming soon. + */ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { - firmware_supported = - (dev->gpu->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 9; + firmware_supported = false; goto out; } -- cgit From 09d49e14ea6fd125a21f89b80f888c09be32a174 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 23 May 2023 11:57:27 -0400 Subject: drm/amdkfd: fix and enable debugging for gfx11 There are a couple of fixes required to enable gfx11 debugging. First, ADD_QUEUE.trap_en is an inappropriate place to toggle a per-process register so move it to SET_SHADER_DEBUGGER.trap_en. When ADD_QUEUE.skip_process_ctx_clear is set, MES will prioritize the SET_SHADER_DEBUGGER.trap_en setting. Second, to preserve correct save/restore priviledged wave states in coordination with the trap enablement setting, resume suspended waves early in the disable call. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index faa7939f35bd..90b86a6ac7bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1863,13 +1863,15 @@ static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *de { bool firmware_supported = true; - /* - * FIXME: GFX11 FW currently not sufficient to deal with CWSR WA. - * Updated FW with API changes coming soon. - */ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { - firmware_supported = false; + uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version & + AMDGPU_MES_API_VERSION_MASK) >> + AMDGPU_MES_API_VERSION_SHIFT; + uint32_t mes_rev = dev->gpu->adev->mes.sched_version & + AMDGPU_MES_VERSION_MASK; + + firmware_supported = (mes_api_rev >= 14) && (mes_rev >= 64); goto out; } -- cgit