From a0c24d5cd627a6cd727774d1c6880e940ae9943e Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Tue, 20 Nov 2018 13:30:51 -0800 Subject: [PATCH] Fix alignment Change-Id: I020e6ef411bf1d5b4f0599363cbe4f7b0e1ec4fd --- opencl/src/devenq/enqueue.cl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/opencl/src/devenq/enqueue.cl b/opencl/src/devenq/enqueue.cl index ecf8a1c1..8f729403 100644 --- a/opencl/src/devenq/enqueue.cl +++ b/opencl/src/devenq/enqueue.cl @@ -314,7 +314,7 @@ __enqueue_kernel_varargs(queue_t q, kernel_enqueue_flags_t f, const ndrange_t r, __global AmdVQueueHeader *vq = __builtin_astype(q, __global AmdVQueueHeader *); if (lo > LSIZE_LIMIT || - align_up(csize, sizeof(uint)) + align_up(nl*sizeof(uint), sizeof(size_t)) + NUM_IMPLICIT_ARGS*sizeof(size_t) > vq->arg_size || + align_up(align_up(csize, sizeof(uint)) + nl*sizeof(uint), sizeof(size_t)) + NUM_IMPLICIT_ARGS*sizeof(size_t) > vq->arg_size || mul24(mul24((uint)r.localWorkSize[0], (uint)r.localWorkSize[1]), (uint)r.localWorkSize[2]) > CL_DEVICE_MAX_WORK_GROUP_SIZE) return CLK_ENQUEUE_FAILURE; @@ -334,7 +334,8 @@ __enqueue_kernel_varargs(queue_t q, kernel_enqueue_flags_t f, const ndrange_t r, for (uint il=0; ilaql.kernarg_address + + align_up(align_up(csize, sizeof(uint)) + nl*sizeof(uint), sizeof(size_t))); implicit[0] = r.globalWorkOffset[0]; implicit[1] = r.globalWorkOffset[1]; implicit[2] = r.globalWorkOffset[2]; @@ -383,7 +384,7 @@ __enqueue_kernel_events_varargs(queue_t q, kernel_enqueue_flags_t f, const ndran if (lo > LSIZE_LIMIT || nwl > vq->wait_size || - align_up(csize, sizeof(uint)) + align_up(nl*sizeof(uint), sizeof(size_t)) + NUM_IMPLICIT_ARGS*sizeof(size_t) > vq->arg_size || + align_up(align_up(csize, sizeof(uint)) + nl*sizeof(uint), sizeof(size_t)) + NUM_IMPLICIT_ARGS*sizeof(size_t) > vq->arg_size || mul24(mul24((uint)r.localWorkSize[0], (uint)r.localWorkSize[1]), (uint)r.localWorkSize[2]) > CL_DEVICE_MAX_WORK_GROUP_SIZE) return CLK_ENQUEUE_FAILURE; @@ -421,7 +422,8 @@ __enqueue_kernel_events_varargs(queue_t q, kernel_enqueue_flags_t f, const ndran for (uint il=0; ilaql.kernarg_address + + align_up(align_up(csize, sizeof(uint)) + nl*sizeof(uint), sizeof(size_t))); implicit[0] = r.globalWorkOffset[0]; implicit[1] = r.globalWorkOffset[1]; implicit[2] = r.globalWorkOffset[2];