From e65b6f49e7786f7c67ed912647270c82c7236191 Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Wed, 26 Jan 2022 07:49:07 -0800 Subject: [PATCH 1/4] Add a means for the device library to determine the code object being targeted in a manner similar to how the ISA version is handled. Change-Id: I8045b78d5119acce015d4e2fb2d5db3af4408601 --- oclc/inc/oclc.h | 7 +++++++ oclc/src/abi_version_400.cl | 11 +++++++++++ oclc/src/abi_version_500.cl | 11 +++++++++++ 3 files changed, 29 insertions(+) create mode 100644 oclc/src/abi_version_400.cl create mode 100644 oclc/src/abi_version_500.cl diff --git a/oclc/inc/oclc.h b/oclc/inc/oclc.h index 42a74b7c..ae551977 100644 --- a/oclc/inc/oclc.h +++ b/oclc/inc/oclc.h @@ -25,9 +25,15 @@ // __constant bool __oclc_correctly_rounded_sqrt32(void) // - the application is expecting sqrt(float) to produce a correctly rounded result // +// __constant bool __oclc_wavefrontsize64 +// - the application is being compiled for a wavefront size of 64 +// // __constant int __oclc_ISA_version // - the ISA version of the target device // +// __constant int __oclc_ABI_version +// - the ABI version the application is being compiled for +// // it is expected that the implementation provides these as if declared from the following // C code: // @@ -41,5 +47,6 @@ extern const __constant bool __oclc_daz_opt; extern const __constant bool __oclc_correctly_rounded_sqrt32; extern const __constant bool __oclc_wavefrontsize64; extern const __constant int __oclc_ISA_version; +extern const __constant int __oclc_ABI_version; #endif // OCLC_H diff --git a/oclc/src/abi_version_400.cl b/oclc/src/abi_version_400.cl new file mode 100644 index 00000000..3d9f6c3f --- /dev/null +++ b/oclc/src/abi_version_400.cl @@ -0,0 +1,11 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ABI_version = 400; + diff --git a/oclc/src/abi_version_500.cl b/oclc/src/abi_version_500.cl new file mode 100644 index 00000000..0a09ea20 --- /dev/null +++ b/oclc/src/abi_version_500.cl @@ -0,0 +1,11 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ABI_version = 500; + From a84f9ae7b7654e0231a31760a7cf2d832892b9ce Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Tue, 8 Feb 2022 10:39:31 -0800 Subject: [PATCH 2/4] Set code version to none if available Change-Id: I2eb828a4b99ec66ad8d30baf79fabf0dfb2e0433 --- cmake/OCL.cmake | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmake/OCL.cmake b/cmake/OCL.cmake index 4e324e8c..cc533bee 100644 --- a/cmake/OCL.cmake +++ b/cmake/OCL.cmake @@ -29,6 +29,18 @@ if (WIN32) set(CLANG_OCL_FLAGS ${CLANG_OCL_FLAGS} -fshort-wchar) endif() +# Disable code object version module flag if available. +file(WRITE ${CMAKE_BINARY_DIR}/tmp.cl "") +execute_process ( + COMMAND ${LLVM_TOOLS_BINARY_DIR}/clang${EXE_SUFFIX} ${CLANG_OCL_FLAGS} -Xclang -mcode-object-version=none ${CMAKE_BINARY_DIR}/tmp.cl + RESULT_VARIABLE TEST_CODE_OBJECT_VERSION_NONE_RESULT + ERROR_QUIET +) +file(REMOVE ${CMAKE_BINARY_DIR}/tmp.cl) +if (NOT TEST_CODE_OBJECT_VERSION_NONE_RESULT) + set(CLANG_OCL_FLAGS ${CLANG_OCL_FLAGS} -Xclang -mcode-object-version=none) +endif() + set (BC_EXT .bc) set (LIB_SUFFIX ".lib${BC_EXT}") set (STRIP_SUFFIX ".strip${BC_EXT}") From d8410fac7841f3f8cabaeef6306389e387f3786f Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Wed, 9 Feb 2022 14:59:22 -0800 Subject: [PATCH 3/4] Cooperative groups sync fixes. Fixes SWDEV-320329 and SWDEV-322407 Change-Id: I4261a1b76c32e8223a7673c279d850b91997d419 --- ockl/src/cg.cl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ockl/src/cg.cl b/ockl/src/cg.cl index 53725ab4..1afc2362 100644 --- a/ockl/src/cg.cl +++ b/ockl/src/cg.cl @@ -82,6 +82,7 @@ __attribute__((convergent)) void __ockl_grid_sync(void) { __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent"); + __builtin_amdgcn_s_barrier(); if (choose_one_workgroup_workitem()) { uint nwm1 = (uint)__ockl_get_num_groups(0) * (uint)__ockl_get_num_groups(1) * (uint)__ockl_get_num_groups(2) - 1; __ockl_gws_barrier(nwm1, 0); @@ -125,14 +126,15 @@ __ockl_multi_grid_is_valid(void) __attribute__((convergent)) void __ockl_multi_grid_sync(void) { - __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent"); + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); + __builtin_amdgcn_s_barrier(); uint nwm1 = (uint)__ockl_get_num_groups(0) * (uint)__ockl_get_num_groups(1) * (uint)__ockl_get_num_groups(2) - 1; bool cwwi = choose_one_workgroup_workitem(); if (cwwi) __ockl_gws_barrier(nwm1, 0); - __builtin_amdgcn_s_barrier(); + // Need another barrier here if the following choose doesn't see cwwi set if (choose_one_grid_workitem()) { __constant struct mg_info *m = (__constant struct mg_info *)get_mg_info_arg(); From 4dfb181ef56dfac98f921b55e4fdd4acbe377b63 Mon Sep 17 00:00:00 2001 From: pvellien Date: Tue, 1 Feb 2022 08:14:26 +0000 Subject: [PATCH 4/4] Add missing __attribute__(optnone) to asan interface functions. Change-Id: Id986f85ae97a853b84fb4b1330e588e98b3660e2 --- asanrtl/src/globals.cl | 4 ++-- asanrtl/src/preserve.cl | 7 +++++-- asanrtl/src/shadow_mapping.cl | 2 +- asanrtl/src/stubs.cl | 6 +++--- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/asanrtl/src/globals.cl b/asanrtl/src/globals.cl index 21acfe7e..d2fe8da7 100644 --- a/asanrtl/src/globals.cl +++ b/asanrtl/src/globals.cl @@ -45,7 +45,7 @@ poison_redzones(__global const struct device_global *g) { } // This function is called by one-workitem constructor kernel. -NO_SANITIZE_ADDR +OPT_NONE NO_SANITIZE_ADDR void __asan_register_globals(uptr globals, uptr n) { __global struct device_global *dglobals = (__global struct device_global*) globals; @@ -65,7 +65,7 @@ unpoison_global(__global const struct device_global *g) { } // This function is called by one-workitem destructor kernel. -NO_SANITIZE_ADDR +OPT_NONE NO_SANITIZE_ADDR void __asan_unregister_globals(uptr globals, uptr n) { __global struct device_global* dglobals = (__global struct device_global*) globals; diff --git a/asanrtl/src/preserve.cl b/asanrtl/src/preserve.cl index 49e9ceec..e49bb0a6 100644 --- a/asanrtl/src/preserve.cl +++ b/asanrtl/src/preserve.cl @@ -5,7 +5,7 @@ * License. See LICENSE.TXT for details. *===------------------------------------------------------------------------*/ -typedef ulong uptr; +#include "asan_util.h" extern void __asan_report_load1 (uptr addr); extern void __asan_report_load1_noabort (uptr addr); extern void __asan_report_load2 (uptr addr); @@ -72,7 +72,10 @@ extern void __asan_unregister_elf_globals(uptr flag, uptr start, uptr stop); extern void __asan_init(void); extern void __asan_version_mismatch_check_v8(void); -void +// Functions called within the below function must not get inlined and their +// names want to be preserved at higher opt level to enable linking between +// module compiled with asan instrumentation and asan device rtl. +OPT_NONE void __amdgpu_device_library_preserve_asan_functions(void) { __asan_report_load1(0); diff --git a/asanrtl/src/shadow_mapping.cl b/asanrtl/src/shadow_mapping.cl index e18c65ac..f59450f8 100644 --- a/asanrtl/src/shadow_mapping.cl +++ b/asanrtl/src/shadow_mapping.cl @@ -30,7 +30,7 @@ range_check(uptr beg, uptr end) { } //check all application bytes in [beg,beg+size) range are accessible -NO_SANITIZE_ADDR +OPT_NONE NO_SANITIZE_ADDR uptr __asan_region_is_poisoned(uptr beg, uptr size) { diff --git a/asanrtl/src/stubs.cl b/asanrtl/src/stubs.cl index c68fe1ac..71405579 100644 --- a/asanrtl/src/stubs.cl +++ b/asanrtl/src/stubs.cl @@ -5,7 +5,7 @@ * License. See LICENSE.TXT for details. *===------------------------------------------------------------------------*/ -typedef ulong uptr; +#include "asan_util.h" void __asan_handle_no_return(void) {} @@ -25,7 +25,7 @@ void __asan_register_elf_globals(uptr flag, uptr start, uptr stop) {} void __asan_unregister_elf_globals(uptr flag, uptr start, uptr stop) {} -void __asan_init(void) {} +OPT_NONE NO_SANITIZE_ADDR void __asan_init(void) {} -void __asan_version_mismatch_check_v8(void) {} +OPT_NONE NO_SANITIZE_ADDR void __asan_version_mismatch_check_v8(void) {}