diff --git a/ockl/src/wfaas.cl b/ockl/src/wfaas.cl index 482c2519..67d8664d 100644 --- a/ockl/src/wfaas.cl +++ b/ockl/src/wfaas.cl @@ -13,15 +13,29 @@ // XXX from llvm/include/llvm/IR/InstrTypes.h #define ICMP_NE 33 +// Hack to prevent incorrect hoisting of the operation. There +// currently is no proper way in llvm to prevent hoisting of +// operations control flow dependent results. +ATTR +static int optimizationBarrierHack(int in_val) +{ + int out_val; + __asm__ volatile ("; ockl ballot hoisting hack %0" : + "=v"(out_val) : "0"(in_val)); + return out_val; +} + ATTR bool OCKL_MANGLE_I32(wfany)(int e) { + e = optimizationBarrierHack(e); return __builtin_amdgcn_sicmp(e, 0, ICMP_NE) != 0UL; } ATTR bool OCKL_MANGLE_I32(wfall)(int e) { + e = optimizationBarrierHack(e); return __builtin_amdgcn_sicmp(e, 0, ICMP_NE) == __builtin_amdgcn_read_exec(); } @@ -29,6 +43,7 @@ OCKL_MANGLE_I32(wfall)(int e) ATTR bool OCKL_MANGLE_I32(wfsame)(int e) { + e = optimizationBarrierHack(e); ulong u = __builtin_amdgcn_sicmp(e, 0, ICMP_NE) != 0; return (u == 0UL) | (u == __builtin_amdgcn_read_exec()); }