From 692f84de9222c572644aa59726795d21da3fed63 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 31 May 2019 09:58:57 -0400 Subject: [PATCH] Use inline asm hack for ballots The problem of hoisting operations where the value depends on the control flow isn't going to be fixed in llvm any time soon. Use the same hack that mesa uses. Wrap the input in a no-op asm sideeffect statement. The asm won't be hoisted, so the value dependent intrinsic call can't be hoisted either. Change-Id: Ib62f5b2aff952ddbb67de63dbfae86a65e5b60a1 --- ockl/src/wfaas.cl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ockl/src/wfaas.cl b/ockl/src/wfaas.cl index 482c2519..67d8664d 100644 --- a/ockl/src/wfaas.cl +++ b/ockl/src/wfaas.cl @@ -13,15 +13,29 @@ // XXX from llvm/include/llvm/IR/InstrTypes.h #define ICMP_NE 33 +// Hack to prevent incorrect hoisting of the operation. There +// currently is no proper way in llvm to prevent hoisting of +// operations control flow dependent results. +ATTR +static int optimizationBarrierHack(int in_val) +{ + int out_val; + __asm__ volatile ("; ockl ballot hoisting hack %0" : + "=v"(out_val) : "0"(in_val)); + return out_val; +} + ATTR bool OCKL_MANGLE_I32(wfany)(int e) { + e = optimizationBarrierHack(e); return __builtin_amdgcn_sicmp(e, 0, ICMP_NE) != 0UL; } ATTR bool OCKL_MANGLE_I32(wfall)(int e) { + e = optimizationBarrierHack(e); return __builtin_amdgcn_sicmp(e, 0, ICMP_NE) == __builtin_amdgcn_read_exec(); } @@ -29,6 +43,7 @@ OCKL_MANGLE_I32(wfall)(int e) ATTR bool OCKL_MANGLE_I32(wfsame)(int e) { + e = optimizationBarrierHack(e); ulong u = __builtin_amdgcn_sicmp(e, 0, ICMP_NE) != 0; return (u == 0UL) | (u == __builtin_amdgcn_read_exec()); }