aboutsummaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common/command_submission.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common/command_submission.c')
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c105
1 files changed, 72 insertions, 33 deletions
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 6dafff375f1c..4c8000fd246c 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence)
fence->cs_sequence = sequence;
fence->error = 0;
fence->timestamp = ktime_set(0, 0);
+ fence->mcs_handling_done = false;
init_completion(&fence->completion);
}
@@ -431,11 +432,10 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
/* Don't cancel TDR in case this CS was timedout because we might be
* running from the TDR context
*/
- if (cs && (cs->timedout ||
- hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
+ if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
return;
- if (cs && cs->tdr_active)
+ if (cs->tdr_active)
cancel_delayed_work_sync(&cs->work_tdr);
spin_lock(&hdev->cs_mirror_lock);
@@ -536,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
mcs_compl->timestamp =
ktime_to_ns(fence->timestamp);
complete_all(&mcs_compl->completion);
+
+ /*
+ * Setting mcs_handling_done inside the lock ensures
+ * at least one fence have mcs_handling_done set to
+ * true before wait for mcs finish. This ensures at
+ * least one CS will be set as completed when polling
+ * mcs fences.
+ */
+ fence->mcs_handling_done = true;
}
spin_unlock(&mcs_compl->lock);
}
+ /* In case CS completed without mcs completion initialized */
+ fence->mcs_handling_done = true;
}
static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
@@ -2371,32 +2382,48 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
break;
}
- mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;
-
- if (status == CS_WAIT_STATUS_BUSY)
- continue;
-
- mcs_data->completion_bitmap |= BIT(i);
-
- /*
- * best effort to extract timestamp. few notes:
- * - if even single fence is gone we cannot extract timestamp
- * (as fence not exist anymore)
- * - for all completed CSs we take the earliest timestamp.
- * for this we have to validate that:
- * 1. given timestamp was indeed set
- * 2. the timestamp is earliest of all timestamps so far
- */
+ switch (status) {
+ case CS_WAIT_STATUS_BUSY:
+ /* CS did not finished, keep waiting on its QID*/
+ mcs_data->stream_master_qid_map |=
+ fence->stream_master_qid_map;
+ break;
+ case CS_WAIT_STATUS_COMPLETED:
+ /*
+ * Using mcs_handling_done to avoid possibility of mcs_data
+ * returns to user indicating CS completed before it finished
+ * all of its mcs handling, to avoid race the next time the
+ * user waits for mcs.
+ */
+ if (!fence->mcs_handling_done)
+ break;
- if (status == CS_WAIT_STATUS_GONE) {
+ mcs_data->completion_bitmap |= BIT(i);
+ /*
+ * For all completed CSs we take the earliest timestamp.
+ * For this we have to validate that the timestamp is
+ * earliest of all timestamps so far.
+ */
+ if (mcs_data->update_ts &&
+ (ktime_compare(fence->timestamp, first_cs_time) < 0))
+ first_cs_time = fence->timestamp;
+ break;
+ case CS_WAIT_STATUS_GONE:
mcs_data->update_ts = false;
mcs_data->gone_cs = true;
- } else if (mcs_data->update_ts &&
- (ktime_compare(fence->timestamp,
- ktime_set(0, 0)) > 0) &&
- (ktime_compare(fence->timestamp, first_cs_time) < 0)) {
- first_cs_time = fence->timestamp;
+ /*
+ * It is possible to get an old sequence numbers from user
+ * which related to already completed CSs and their fences
+ * already gone. In this case, CS set as completed but
+ * no need to consider its QID for mcs completion.
+ */
+ mcs_data->completion_bitmap |= BIT(i);
+ break;
+ default:
+ dev_err(hdev->dev, "Invalid fence status\n");
+ return -EINVAL;
}
+
}
hl_fences_put(mcs_data->fence_arr, arr_len);
@@ -2740,13 +2767,14 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
u32 timeout_us, u64 user_address,
- u32 target_value, u16 interrupt_offset,
- enum hl_cs_wait_status *status)
+ u64 target_value, u16 interrupt_offset,
+ enum hl_cs_wait_status *status,
+ u64 *timestamp)
{
struct hl_user_pending_interrupt *pend;
struct hl_user_interrupt *interrupt;
unsigned long timeout, flags;
- u32 completion_value;
+ u64 completion_value;
long completion_rc;
int rc = 0;
@@ -2780,15 +2808,17 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
/* We check for completion value as interrupt could have been received
* before we added the node to the wait list
*/
- if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
+ if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT;
goto remove_pending_user_interrupt;
}
- if (completion_value >= target_value)
+ if (completion_value >= target_value) {
*status = CS_WAIT_STATUS_COMPLETED;
- else
+ /* There was no interrupt, we assume the completion is now. */
+ pend->fence.timestamp = ktime_get();
+ } else
*status = CS_WAIT_STATUS_BUSY;
if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
@@ -2812,7 +2842,7 @@ wait_again:
reinit_completion(&pend->fence.completion);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
- if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
+ if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT;
@@ -2839,6 +2869,8 @@ remove_pending_user_interrupt:
list_del(&pend->wait_list_node);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+ *timestamp = ktime_to_ns(pend->fence.timestamp);
+
kfree(pend);
hl_ctx_put(ctx);
@@ -2852,6 +2884,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
struct asic_fixed_properties *prop;
union hl_wait_cs_args *args = data;
enum hl_cs_wait_status status;
+ u64 timestamp;
int rc;
prop = &hdev->asic_prop;
@@ -2881,7 +2914,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
args->in.interrupt_timeout_us, args->in.addr,
- args->in.target, interrupt_offset, &status);
+ args->in.target, interrupt_offset, &status,
+ &timestamp);
if (rc) {
if (rc != -EINTR)
@@ -2893,6 +2927,11 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
memset(args, 0, sizeof(*args));
+ if (timestamp) {
+ args->out.timestamp_nsec = timestamp;
+ args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
+ }
+
switch (status) {
case CS_WAIT_STATUS_COMPLETED:
args->out.status = HL_WAIT_CS_STATUS_COMPLETED;