diff options
Diffstat (limited to 'drivers/misc/habanalabs/common/command_submission.c')
-rw-r--r-- | drivers/misc/habanalabs/common/command_submission.c | 105 |
1 files changed, 72 insertions, 33 deletions
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 6dafff375f1c..4c8000fd246c 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence) fence->cs_sequence = sequence; fence->error = 0; fence->timestamp = ktime_set(0, 0); + fence->mcs_handling_done = false; init_completion(&fence->completion); } @@ -431,11 +432,10 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) /* Don't cancel TDR in case this CS was timedout because we might be * running from the TDR context */ - if (cs && (cs->timedout || - hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)) + if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT) return; - if (cs && cs->tdr_active) + if (cs->tdr_active) cancel_delayed_work_sync(&cs->work_tdr); spin_lock(&hdev->cs_mirror_lock); @@ -536,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) mcs_compl->timestamp = ktime_to_ns(fence->timestamp); complete_all(&mcs_compl->completion); + + /* + * Setting mcs_handling_done inside the lock ensures + * at least one fence have mcs_handling_done set to + * true before wait for mcs finish. This ensures at + * least one CS will be set as completed when polling + * mcs fences. + */ + fence->mcs_handling_done = true; } spin_unlock(&mcs_compl->lock); } + /* In case CS completed without mcs completion initialized */ + fence->mcs_handling_done = true; } static inline void cs_release_sob_reset_handler(struct hl_device *hdev, @@ -2371,32 +2382,48 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data) break; } - mcs_data->stream_master_qid_map |= fence->stream_master_qid_map; - - if (status == CS_WAIT_STATUS_BUSY) - continue; - - mcs_data->completion_bitmap |= BIT(i); - - /* - * best effort to extract timestamp. few notes: - * - if even single fence is gone we cannot extract timestamp - * (as fence not exist anymore) - * - for all completed CSs we take the earliest timestamp. - * for this we have to validate that: - * 1. given timestamp was indeed set - * 2. the timestamp is earliest of all timestamps so far - */ + switch (status) { + case CS_WAIT_STATUS_BUSY: + /* CS did not finished, keep waiting on its QID*/ + mcs_data->stream_master_qid_map |= + fence->stream_master_qid_map; + break; + case CS_WAIT_STATUS_COMPLETED: + /* + * Using mcs_handling_done to avoid possibility of mcs_data + * returns to user indicating CS completed before it finished + * all of its mcs handling, to avoid race the next time the + * user waits for mcs. + */ + if (!fence->mcs_handling_done) + break; - if (status == CS_WAIT_STATUS_GONE) { + mcs_data->completion_bitmap |= BIT(i); + /* + * For all completed CSs we take the earliest timestamp. + * For this we have to validate that the timestamp is + * earliest of all timestamps so far. + */ + if (mcs_data->update_ts && + (ktime_compare(fence->timestamp, first_cs_time) < 0)) + first_cs_time = fence->timestamp; + break; + case CS_WAIT_STATUS_GONE: mcs_data->update_ts = false; mcs_data->gone_cs = true; - } else if (mcs_data->update_ts && - (ktime_compare(fence->timestamp, - ktime_set(0, 0)) > 0) && - (ktime_compare(fence->timestamp, first_cs_time) < 0)) { - first_cs_time = fence->timestamp; + /* + * It is possible to get an old sequence numbers from user + * which related to already completed CSs and their fences + * already gone. In this case, CS set as completed but + * no need to consider its QID for mcs completion. + */ + mcs_data->completion_bitmap |= BIT(i); + break; + default: + dev_err(hdev->dev, "Invalid fence status\n"); + return -EINVAL; } + } hl_fences_put(mcs_data->fence_arr, arr_len); @@ -2740,13 +2767,14 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u32 timeout_us, u64 user_address, - u32 target_value, u16 interrupt_offset, - enum hl_cs_wait_status *status) + u64 target_value, u16 interrupt_offset, + enum hl_cs_wait_status *status, + u64 *timestamp) { struct hl_user_pending_interrupt *pend; struct hl_user_interrupt *interrupt; unsigned long timeout, flags; - u32 completion_value; + u64 completion_value; long completion_rc; int rc = 0; @@ -2780,15 +2808,17 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, /* We check for completion value as interrupt could have been received * before we added the node to the wait list */ - if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { + if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { dev_err(hdev->dev, "Failed to copy completion value from user\n"); rc = -EFAULT; goto remove_pending_user_interrupt; } - if (completion_value >= target_value) + if (completion_value >= target_value) { *status = CS_WAIT_STATUS_COMPLETED; - else + /* There was no interrupt, we assume the completion is now. */ + pend->fence.timestamp = ktime_get(); + } else *status = CS_WAIT_STATUS_BUSY; if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED)) @@ -2812,7 +2842,7 @@ wait_again: reinit_completion(&pend->fence.completion); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); - if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { + if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { dev_err(hdev->dev, "Failed to copy completion value from user\n"); rc = -EFAULT; @@ -2839,6 +2869,8 @@ remove_pending_user_interrupt: list_del(&pend->wait_list_node); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + *timestamp = ktime_to_ns(pend->fence.timestamp); + kfree(pend); hl_ctx_put(ctx); @@ -2852,6 +2884,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) struct asic_fixed_properties *prop; union hl_wait_cs_args *args = data; enum hl_cs_wait_status status; + u64 timestamp; int rc; prop = &hdev->asic_prop; @@ -2881,7 +2914,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, args->in.interrupt_timeout_us, args->in.addr, - args->in.target, interrupt_offset, &status); + args->in.target, interrupt_offset, &status, + ×tamp); if (rc) { if (rc != -EINTR) @@ -2893,6 +2927,11 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) memset(args, 0, sizeof(*args)); + if (timestamp) { + args->out.timestamp_nsec = timestamp; + args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; + } + switch (status) { case CS_WAIT_STATUS_COMPLETED: args->out.status = HL_WAIT_CS_STATUS_COMPLETED; |