Skip to content

Commit

Permalink
cuda/hip: Fix measuring kernel times.
Browse files Browse the repository at this point in the history
  • Loading branch information
fweig committed Sep 19, 2024
1 parent f719f8a commit bf8dfdf
Showing 1 changed file with 9 additions and 13 deletions.
22 changes: 9 additions & 13 deletions src/xpu/detail/platform/hip_cuda/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,27 +611,22 @@ struct action_runner<kernel_tag, K, void(K::*)(kernel_context<typename K::shared
cudaEvent_t start, end;
int err = 0;

cudaStream_t stream = static_cast<cudaStream_t>(launch_info.queue_handle);

if (measure_time) {
SAFE_CALL(cudaEventCreate(&start));
ON_ERROR_GOTO(err, cudaEventCreate(&end), cleanup_start_event);
}

if (measure_time) {
ON_ERROR_GOTO(err, cudaEventRecord(start), cleanup_events);
}

if (launch_info.queue_handle == nullptr) {
kernel_entry_bounded<K, K::block_size::value.linear(), Args...><<<grid_dim.as_cuda_grid(), block_dim.as_cuda_grid()>>>(args...);
} else {
cudaStream_t stream = static_cast<cudaStream_t>(launch_info.queue_handle);
kernel_entry_bounded<K, K::block_size::value.linear(), Args...><<<grid_dim.as_cuda_grid(), block_dim.as_cuda_grid(), 0, stream>>>(args...);
ON_ERROR_GOTO(err, cudaEventRecord(start, stream), cleanup_events);
}

kernel_entry_bounded<K, K::block_size::value.linear(), Args...><<<grid_dim.as_cuda_grid(), block_dim.as_cuda_grid(), 0, stream>>>(args...);

if (measure_time) {
ON_ERROR_GOTO(err, cudaEventRecord(end), cleanup_events);
ON_ERROR_GOTO(err, cudaEventRecord(end, stream), cleanup_events);
}
SAFE_CALL(cudaDeviceSynchronize());

if (measure_time) {
ON_ERROR_GOTO(err, cudaEventSynchronize(end), cleanup_events);
Expand Down Expand Up @@ -687,13 +682,14 @@ struct action_runner<kernel_tag, K, void(K::*)(kernel_context<typename K::shared
}

if (measure_time) {
ON_ERROR_GOTO(err, hipEventRecord(start), cleanup_events);
ON_ERROR_GOTO(err, hipEventRecord(start, stream), cleanup_events);
}

hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel_entry_bounded<K, K::block_size::value.linear(), Args...>), grid_dim.as_cuda_grid(), block_dim.as_cuda_grid(), 0, stream, std::forward<Args>(args)...);

if (measure_time) {
ON_ERROR_GOTO(err, hipEventRecord(end), cleanup_events);
ON_ERROR_GOTO(err, hipEventRecord(end, stream), cleanup_events);
}
SAFE_CALL(hipDeviceSynchronize());

if (measure_time) {
ON_ERROR_GOTO(err, hipEventSynchronize(end), cleanup_events);
Expand Down

0 comments on commit bf8dfdf

Please sign in to comment.