From d5eb1e52fa57d65d7cc8ab581a725a6dc1a03e24 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Fri, 28 Jun 2024 09:16:38 +0200 Subject: [PATCH 1/3] [SYCL][E2E] Reenable in_order_profiling_queue for L0 (#14328) Due to some confusion about the output from the in_order_profiling_queue test on L0, the test was disabled. However, the test can be safely reenabled for that target, while keeping it disabled for FPGA. Additionally, the failure in profiling_queue is believed to be due to the same issue, so the JIRA has been added to it and the note in in_order_profiling_queue has been updated to reflect the known information about the failure. Signed-off-by: Larsen, Steffen --- sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp | 4 ++-- sycl/test-e2e/ProfilingTag/profiling_queue.cpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp b/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp index f34be43ae9587..2b8871ef5a3b6 100644 --- a/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp +++ b/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp @@ -21,9 +21,9 @@ // https://github.com/intel/llvm/issues/14053 // UNSUPPORTED: cuda -// Fails on FPGA and level_zero too +// FPGA emulator seems to return unexpected start time for the fallback barrier. // https://github.com/intel/llvm/issues/14315 -// UNSUPPORTED: accelerator || level_zero +// UNSUPPORTED: accelerator #include "common.hpp" diff --git a/sycl/test-e2e/ProfilingTag/profiling_queue.cpp b/sycl/test-e2e/ProfilingTag/profiling_queue.cpp index d0da7612d4ea9..a028278ed957a 100644 --- a/sycl/test-e2e/ProfilingTag/profiling_queue.cpp +++ b/sycl/test-e2e/ProfilingTag/profiling_queue.cpp @@ -17,6 +17,7 @@ // UNSUPPORTED: hip // FPGA emulator seems to return unexpected start time for the fallback barrier. +// https://github.com/intel/llvm/issues/14315 // UNSUPPORTED: accelerator // Flaky on CUDA From df0dc3b397c13648ebecddfbbaf71cd8109cabd7 Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 28 Jun 2024 20:35:14 +0800 Subject: [PATCH 2/3] [DeviceSanitizer] Support out-of-bounds on private memory (#13935) UR: https://github.com/oneapi-src/unified-runtime/pull/1676 Instrument "__asan_mem_to_shadow" to convert private address to its shadow memory address Other steps are same with ASan on stack. --- libdevice/include/asan_libdevice.hpp | 6 +- libdevice/sanitizer_utils.cpp | 53 ++++++++- .../Instrumentation/AddressSanitizer.cpp | 102 ++++++++++-------- .../SPIRV/instrument_private_address_space.ll | 32 ++++++ sycl/plugins/unified_runtime/CMakeLists.txt | 12 +-- .../AddressSanitizer/common/kernel-debug.cpp | 16 ++- .../USM/parallel_no_local_size.cpp | 41 +++++++ .../private/multiple_private.cpp | 77 +++++++++++++ .../out-of-bounds/private/single_private.cpp | 32 ++++++ 9 files changed, 308 insertions(+), 63 deletions(-) create mode 100644 llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll create mode 100644 sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp create mode 100644 sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp create mode 100644 sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp diff --git a/libdevice/include/asan_libdevice.hpp b/libdevice/include/asan_libdevice.hpp index 21ddb7478173f..5f04b557e5acd 100644 --- a/libdevice/include/asan_libdevice.hpp +++ b/libdevice/include/asan_libdevice.hpp @@ -66,8 +66,8 @@ struct LocalArgsInfo { constexpr std::size_t ASAN_MAX_NUM_REPORTS = 10; struct LaunchInfo { - // Don't move this field, we use it in AddressSanitizerPass uintptr_t PrivateShadowOffset = 0; + uintptr_t PrivateShadowOffsetEnd = 0; uintptr_t LocalShadowOffset = 0; uintptr_t LocalShadowOffsetEnd = 0; @@ -82,8 +82,8 @@ constexpr unsigned ASAN_SHADOW_SCALE = 4; constexpr unsigned ASAN_SHADOW_GRANULARITY = 1ULL << ASAN_SHADOW_SCALE; // Based on the observation, only the last 24 bits of the address of the private -// variable have changed, we use 31 bits(2G) to be safe. -constexpr std::size_t ASAN_PRIVATE_SIZE = 0x7fffffffULL + 1; +// variable have changed +constexpr std::size_t ASAN_PRIVATE_SIZE = 0xffffffULL + 1; // These magic values are written to shadow for better error // reporting. diff --git a/libdevice/sanitizer_utils.cpp b/libdevice/sanitizer_utils.cpp index e63c634e30930..651067be69851 100644 --- a/libdevice/sanitizer_utils.cpp +++ b/libdevice/sanitizer_utils.cpp @@ -65,6 +65,9 @@ static const __SYCL_CONSTANT__ char __global_shadow_out_of_bound[] = static const __SYCL_CONSTANT__ char __local_shadow_out_of_bound[] = "[kernel] Local shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: " "%p)\n"; +static const __SYCL_CONSTANT__ char __private_shadow_out_of_bound[] = + "[kernel] Private shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: " + "%p)\n"; static const __SYCL_CONSTANT__ char __asan_print_unsupport_device_type[] = "[kernel] Unsupport device type: %d\n"; @@ -123,7 +126,7 @@ inline uptr MemToShadow_DG2(uptr addr, uint32_t as) { } if (shadow_ptr > __AsanShadowMemoryGlobalEnd) { - if (__asan_report_out_of_shadow_bounds() && __AsanDebug) { + if (__asan_report_out_of_shadow_bounds()) { __spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr); } } @@ -171,7 +174,7 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) { } if (shadow_ptr > __AsanShadowMemoryGlobalEnd) { - if (__asan_report_out_of_shadow_bounds() && __AsanDebug) { + if (__asan_report_out_of_shadow_bounds()) { __spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr, (uptr)__AsanShadowMemoryGlobalStart); } @@ -207,13 +210,46 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) { ((addr & (SLM_SIZE - 1)) >> ASAN_SHADOW_SCALE); if (shadow_ptr > shadow_offset_end) { - if (__asan_report_out_of_shadow_bounds() && __AsanDebug) { + if (__asan_report_out_of_shadow_bounds()) { __spirv_ocl_printf(__local_shadow_out_of_bound, addr, shadow_ptr, wg_lid, (uptr)shadow_offset); } return 0; } return shadow_ptr; + } else if (as == ADDRESS_SPACE_PRIVATE) { // private + // work-group linear id + const auto WG_LID = + __spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y * + __spirv_BuiltInNumWorkgroups.z + + __spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z + + __spirv_BuiltInWorkgroupId.z; + + auto launch_info = (__SYCL_GLOBAL__ const LaunchInfo *)__AsanLaunchInfo; + const auto shadow_offset = launch_info->PrivateShadowOffset; + const auto shadow_offset_end = launch_info->PrivateShadowOffsetEnd; + + if (shadow_offset == 0) { + return 0; + } + + if (__AsanDebug) + __spirv_ocl_printf(__mem_launch_info, launch_info, + launch_info->PrivateShadowOffset, 0, + launch_info->NumLocalArgs, launch_info->LocalArgs); + + uptr shadow_ptr = shadow_offset + + ((WG_LID * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE) + + ((addr & (ASAN_PRIVATE_SIZE - 1)) >> ASAN_SHADOW_SCALE); + + if (shadow_ptr > shadow_offset_end) { + if (__asan_report_out_of_shadow_bounds()) { + __spirv_ocl_printf(__private_shadow_out_of_bound, addr, shadow_ptr, + WG_LID, (uptr)shadow_offset); + } + return 0; + } + return shadow_ptr; } return 0; @@ -233,6 +269,8 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { return shadow_ptr; } +// FIXME: OCL "O2" optimizer doesn't work well with following code +#if 0 if (__AsanDebug) { if (shadow_ptr) { if (as == ADDRESS_SPACE_PRIVATE) @@ -244,6 +282,7 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { __spirv_ocl_printf(__asan_print_shadow_value2, addr, as, shadow_ptr); } } +#endif return shadow_ptr; } @@ -606,6 +645,14 @@ ASAN_REPORT_ERROR(store, true, 16) ASAN_REPORT_ERROR_N(load, false) ASAN_REPORT_ERROR_N(store, true) +/// +/// ASAN convert memory address to shadow memory address +/// + +DEVICE_EXTERN_C_NOINLINE uptr __asan_mem_to_shadow(uptr ptr, uint32_t as) { + return MemToShadow(ptr, as); +} + /// /// ASAN initialize shdadow memory of local memory /// diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 92d72184f759a..9e893938b3432 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -179,6 +179,8 @@ const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private"; const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64"; const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable"; +const char kAsanMemToShadow[] = "__asan_mem_to_shadow"; + // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; @@ -447,7 +449,7 @@ static cl::opt ClOverrideDestructorKind( static cl::opt ClSpirOffloadPrivates("asan-spir-privates", cl::desc("instrument private pointer"), cl::Hidden, - cl::init(false)); + cl::init(true)); static cl::opt ClSpirOffloadGlobals("asan-spir-globals", cl::desc("instrument global pointer"), @@ -820,14 +822,15 @@ struct AddressSanitizer { Value *SizeArgument, uint32_t Exp, RuntimeCallInserter &RTCI); void instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI); - Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); + Value *memToShadow(Value *Shadow, IRBuilder<> &IRB, + uint32_t AddressSpace = kSpirOffloadPrivateAS); bool suppressInstrumentationSiteForDebug(int &Instrumented); bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI); bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool maybeInsertDynamicShadowAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); void instrumentSyclStaticLocalMemory(CallInst *CI); - void instrumentSyclDynamicLocalMemory(Function &F); + bool instrumentSyclDynamicLocalMemory(Function &F); GlobalVariable *GetOrCreateGlobalString(Module &M, StringRef Name, StringRef Value, @@ -899,6 +902,8 @@ struct AddressSanitizer { FunctionCallee AMDGPUAddressPrivate; int InstrumentationWithCallsThreshold; uint32_t MaxInlinePoisoningSize; + + FunctionCallee AsanMemToShadow; }; class ModuleAddressSanitizer { @@ -1067,7 +1072,7 @@ struct FunctionStackPoisoner : public InstVisitor { DIB(*F.getParent(), /*AllowUnresolved*/ false), C(ASan.C), IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), - PoisonStack(ClStack && + PoisonStack((ClStack || ClSpirOffloadPrivates) && !Triple(F.getParent()->getTargetTriple()).isAMDGPU()) {} bool runOnFunction() { @@ -1350,7 +1355,7 @@ static void ExtendSpirKernelArgs(Module &M, FunctionAnalysisManager &FAM) { } // Fixup all users - for (auto [F, NewF] : SpirFuncs) { + for (auto &[F, NewF] : SpirFuncs) { SmallVector Users(F->users()); for (User *U : Users) { if (auto *CI = dyn_cast(U)) { @@ -1544,13 +1549,13 @@ void AddressSanitizer::AppendDebugInfoToArgs(Instruction *InsertBefore, Args.push_back(ConstantExpr::getPointerCast(FuncNameGV, ConstASPtrTy)); } -Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { +Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB, + uint32_t AddressSpace) { if (TargetTriple.isSPIR()) { - // ((Shadow & 0xffffffff) >> 3) + __AsanShadowMemoryPrivateStart; - Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, 0xffffffff)); - Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); - Value *ShadowBase = IRB.CreateLoad(IntptrTy, AsanShadowDevicePrivate); - return IRB.CreateAdd(Shadow, ShadowBase); + return IRB.CreateCall( + AsanMemToShadow, + {Shadow, ConstantInt::get(IRB.getInt32Ty(), AddressSpace)}, + "shadow_ptr"); } // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); @@ -1619,7 +1624,7 @@ void AddressSanitizer::instrumentSyclStaticLocalMemory(CallInst *CI) { } // Instument dynamic local memory -void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { +bool AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { InstrumentationIRBuilder IRB(F.getEntryBlock().getFirstNonPHI()); // Save "__asan_launch" into local memory "__AsanLaunchInfo" @@ -1631,13 +1636,12 @@ void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { SmallVector LocalArgs; for (auto &Arg : F.args()) { Type *PtrTy = dyn_cast(Arg.getType()->getScalarType()); - // Local address space - if (PtrTy && PtrTy->getPointerAddressSpace() == 3) + if (PtrTy && PtrTy->getPointerAddressSpace() == kSpirOffloadLocalAS) LocalArgs.push_back(&Arg); } if (LocalArgs.empty()) - return; + return false; AllocaInst *ArgsArray = IRB.CreateAlloca( IntptrTy, ConstantInt::get(Int32Ty, LocalArgs.size()), "local_args"); @@ -1649,6 +1653,7 @@ void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { IRB.CreateCall(AsanSetShadowDynamicLocalFunc, {IRB.CreatePointerCast(ArgsArray, IntptrTy), ConstantInt::get(Int32Ty, LocalArgs.size())}); + return true; } // Instrument memset/memmove/memcpy @@ -3232,14 +3237,6 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T ArrayType::get(IRB.getInt8Ty(), 0)); if (TargetTriple.isSPIR()) { - AsanShadowDevicePrivate = - M.getOrInsertGlobal("__AsanShadowMemoryPrivateStart", IntptrTy, [&] { - return new GlobalVariable(M, IntptrTy, true, - GlobalVariable::ExternalLinkage, nullptr, - "__AsanShadowMemoryPrivateStart", nullptr, - GlobalVariable::NotThreadLocal, 1); - }); - // __asan_set_shadow_static_local( // uptr ptr, // size_t size, @@ -3263,6 +3260,9 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T GlobalVariable::ExternalLinkage, nullptr, "__AsanLaunchInfo", nullptr, GlobalVariable::NotThreadLocal, kSpirOffloadLocalAS); }); + + AsanMemToShadow = M.getOrInsertFunction(kAsanMemToShadow, IntptrTy, + IntptrTy, Type::getInt32Ty(*C)); } AMDGPUAddressShared = @@ -3391,10 +3391,6 @@ bool AddressSanitizer::instrumentFunction(Function &F, // can be passed to that intrinsic. markEscapedLocalAllocas(F); - if (F.getCallingConv() == CallingConv::SPIR_KERNEL) { - instrumentSyclDynamicLocalMemory(F); - } - // We want to instrument every address only once per basic block (unless there // are calls between uses). SmallPtrSet TempsToInstrument; @@ -3514,6 +3510,11 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (ChangedStack || !NoReturnCalls.empty()) FunctionModified = true; + // We need to instrument dynamic local arguments after stack poisoner + if (F.getCallingConv() == CallingConv::SPIR_KERNEL) { + FunctionModified |= instrumentSyclDynamicLocalMemory(F); + } + LLVM_DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " " << F << "\n"); @@ -3999,32 +4000,39 @@ void FunctionStackPoisoner::processStaticAllocas() { AI->replaceAllUsesWith(NewAllocaPtr); } + auto TargetTriple = Triple(F.getParent()->getTargetTriple()); + // The left-most redzone has enough space for at least 4 pointers. - // Write the Magic value to redzone[0]. Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); - IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), - BasePlus0); - // Write the frame description constant to redzone[1]. - Value *BasePlus1 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, ASan.LongSize / 8)), - IntptrPtrTy); - GlobalVariable *StackDescriptionGlobal = - createPrivateGlobalForString(*F.getParent(), DescriptionString, - /*AllowMerging*/ true, kAsanGenPrefix); - Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); - IRB.CreateStore(Description, BasePlus1); - // Write the PC to redzone[2]. - Value *BasePlus2 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), - IntptrPtrTy); - IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); + // SPIRV doesn't use the following metadata + if (!TargetTriple.isSPIR()) { + // Write the Magic value to redzone[0]. + IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), + BasePlus0); + // Write the frame description constant to redzone[1]. + Value *BasePlus1 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, ASan.LongSize / 8)), + IntptrPtrTy); + GlobalVariable *StackDescriptionGlobal = + createPrivateGlobalForString(*F.getParent(), DescriptionString, + /*AllowMerging*/ true, kAsanGenPrefix); + Value *Description = + IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); + IRB.CreateStore(Description, BasePlus1); + // Write the PC to redzone[2]. + Value *BasePlus2 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), + IntptrPtrTy); + IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); + } const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L); // Poison the stack red zones at the entry. - Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); + Value *ShadowBase = + ASan.memToShadow(LocalStackBase, IRB, kSpirOffloadPrivateAS); // As mask we must use most poisoned case: red zones and after scope. // As bytes we can use either the same or just red zones only. copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase); diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll new file mode 100644 index 0000000000000..bf412b8225b79 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-spir-privates=1 -asan-use-after-return=never -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [1 x i64] } +%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } + +@__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p = private unnamed_addr addrspace(1) constant [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 + +define spir_func i32 @_Z3fooPii(ptr addrspace(4) %p) { +entry: + %arrayidx = getelementptr inbounds i32, ptr addrspace(4) %p, i64 0 + %0 = load i32, ptr addrspace(4) %arrayidx, align 4 + ret i32 %0 +} + +define spir_kernel void @kernel() #0 { +; CHECK-LABEL: define spir_kernel void @kernel +entry: + %p.i = alloca [4 x i32], align 4 + ; CHECK: %shadow_ptr = call i64 @__asan_mem_to_shadow(i64 %0, i32 0) + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %p.i) + call void @llvm.memcpy.p0.p1.i64(ptr align 4 %p.i, ptr addrspace(1) align 4 @__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p, i64 16, i1 false) + %arraydecay.i = getelementptr inbounds [4 x i32], ptr %p.i, i64 0, i64 0 + %0 = addrspacecast ptr %arraydecay.i to ptr addrspace(4) + %call.i = call spir_func i32 @_Z3fooPii(ptr addrspace(4) %0) + ret void +} + +attributes #0 = { mustprogress norecurse nounwind sanitize_address uwtable } diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index bb36a41b78ed6..0cbbcf4a574be 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -100,13 +100,11 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 396fb20498c315a526c961d7cb645b42795acd2c - # Merge: 719bb9cd e2ffea69 - # Author: Kenneth Benzie (Benie) - # Date: Thu May 23 10:53:03 2024 +0100 - # Merge pull request #1501 from RossBrunton/ross/kerneltests - # [Testing] Spec clarifications and testing updates for kernel - set(UNIFIED_RUNTIME_TAG 764b75c9087930799963a30be726ac76fcf1ac11) + # commit 58ca3a34dea0f559b8d950bdfe7d5d8a610d3a94 + # Author: Yang Zhao + # Date: Thu Jun 27 20:26:17 2024 +0800 + # [DeviceSanitizer] Support out-of-bounds on private memory (#1676) + set(UNIFIED_RUNTIME_TAG 58ca3a34dea0f559b8d950bdfe7d5d8a610d3a94) fetch_adapter_source(level_zero ${UNIFIED_RUNTIME_REPO} diff --git a/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp b/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp index b4ae8b2b30e12..3783de97bd6c1 100644 --- a/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp +++ b/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp @@ -4,12 +4,22 @@ // RUN: env SYCL_PREFER_UR=1 UR_LAYER_ASAN_OPTIONS=debug:0 %{run} %t 2>&1 | FileCheck %s #include +/// This test is used to check enabling/disabling kernel debug message +/// We always use "[kernel]" prefix in kernel debug message + +constexpr std::size_t N = 4; +constexpr std::size_t group_size = 1; + int main() { sycl::queue Q; - int *array = sycl::malloc_device(1, Q); + int *array = sycl::malloc_device(N, Q); - Q.submit([&](sycl::handler &h) { - h.single_task([=]() { *array = 0; }); + Q.submit([&](sycl::handler &cgh) { + auto acc = sycl::local_accessor(group_size, cgh); + cgh.parallel_for( + sycl::nd_range<1>(N, group_size), [=](sycl::nd_item<1> item) { + array[item.get_global_id()] = acc[item.get_local_id()]; + }); }); Q.wait(); // CHECK-DEBUG: [kernel] diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp new file mode 100644 index 0000000000000..2e10143fdad8c --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp @@ -0,0 +1,41 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O0 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{build} %device_asan_flags -DMALLOC_HOST -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s +// RUN: %{build} %device_asan_flags -DMALLOC_SHARED -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s + +#include + +#include + +int main() { + sycl::queue Q; + constexpr std::size_t N = 12345; +#if defined(MALLOC_HOST) + auto *array = sycl::malloc_host(N, Q); +#elif defined(MALLOC_SHARED) + auto *array = sycl::malloc_shared(N, Q); +#else // defined(MALLOC_DEVICE) + auto *array = sycl::malloc_device(N, Q); +#endif + + Q.submit([&](sycl::handler &h) { + h.parallel_for(sycl::range<1>(N + 1), + [=](sycl::id<1> i) { ++array[i]; }); + }); + Q.wait(); + // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM + // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM + // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM + // CHECK: READ of size 1 at kernel {{<.*MyKernel.*>}} LID({{.*}}, 0, 0) GID(12345, 0, 0) + // CHECK: {{ #0 .* .*parallel_no_local_size.cpp:}}[[@LINE-7]] + + sycl::free(array, Q); + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp new file mode 100644 index 0000000000000..1c8ba6915abc8 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp @@ -0,0 +1,77 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_asan_flags -DVAR=1 -O2 -g -o %t1 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t1 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR1 %s +// RUN: %{build} %device_asan_flags -DVAR=2 -O2 -g -o %t2 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t2 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR2 %s +// RUN: %{build} %device_asan_flags -DVAR=3 -O2 -g -o %t3 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t3 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR3 %s +// RUN: %{build} %device_asan_flags -DVAR=4 -O2 -g -o %t4 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t4 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR4 %s +// RUN: %{build} %device_asan_flags -DVAR=5 -O2 -g -o %t5 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t5 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR5 %s +// RUN: %{build} %device_asan_flags -DVAR=6 -O2 -g -o %t6 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t6 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR6 %s + +#include +#include + +// CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Private Memory +template __attribute__((noinline)) T foo(T *p) { return *p; } +template __attribute__((noinline)) T foo1(T *p) { return *p; } +// CHECK-VAR1: READ of size 2 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR1: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo2(T *p) { return *p; } +// CHECK-VAR2: READ of size 2 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR2: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo3(T *p) { return *p; } +// CHECK-VAR3: READ of size 4 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR3: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo4(T *p) { return *p; } +// CHECK-VAR4: READ of size 4 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR4: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo5(T *p) { return *p; } +// CHECK-VAR5: READ of size 8 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR5: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo6(T *p) { return *p; } +// CHECK-VAR6: READ of size 1 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR6: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] + +int main() { + sycl::queue Q; + auto *array = sycl::malloc_device(5, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + short p1[] = {1}; + int p2[] = {1}; + int p3[10] = {8, 1, 10, 1, 0, 10}; + long p4[] = {5111LL}; + char p5[] = {'c'}; + + array[0] = foo(&p1[0]); + array[1] = foo(&p2[0]); + for (int i = 0; i < 10; ++i) + array[2] += foo(&p3[i]); + array[3] = foo(&p4[0]); + array[4] = foo(&p5[0]); + +#if VAR == 1 + array[0] = foo1(&p1[-4]); +#elif VAR == 2 + array[0] = foo2(&p1[4]); +#elif VAR == 3 + array[0] = foo3(&p2[1]); +#elif VAR == 4 + array[0] = foo4(&p3[10]); +#elif VAR == 5 + array[0] = foo5(&p4[1]); +#else + array[0] = foo6(&p5[1]); +#endif + }); + }); + Q.wait(); + sycl::free(array, Q); + + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp new file mode 100644 index 0000000000000..50f1f763233d6 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp @@ -0,0 +1,32 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_asan_flags -O0 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck %s +// RUN: %{build} %device_asan_flags -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck %s +// RUN: %{build} %device_asan_flags -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck %s + +#include +#include + +__attribute__((noinline)) int foo(int p[], int i) { return p[i]; } +// CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Private Memory +// CHECK: READ of size 4 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID({{.*}}, 0, 0) +// CHECK: #0 {{.*}} {{.*single_private.cpp}}:[[@LINE-3]] + +int main() { + sycl::queue Q; + auto *array = sycl::malloc_device(1, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + int p[] = {1, 2, 3, 4}; + for (int i = 0; i < 5; ++i) + array[0] = foo(p, i); + }); + }); + Q.wait(); + sycl::free(array, Q); + + return 0; +} From c9842c1a78bb85149d96654da6e1ae85c2e86624 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Fri, 28 Jun 2024 07:40:32 -0700 Subject: [PATCH 3/3] [SYCL] Remove deprecated shuffles from the sub-group class (#13666) Re-lands https://github.com/intel/llvm/pull/13236 and reverts https://github.com/intel/llvm/pull/13463. --- sycl/include/sycl/sub_group.hpp | 58 ----- sycl/test-e2e/SubGroup/generic-shuffle.cpp | 239 --------------------- sycl/test-e2e/SubGroup/shuffle.cpp | 54 ----- sycl/test-e2e/SubGroup/shuffle_fp16.cpp | 26 --- sycl/test-e2e/SubGroup/shuffle_fp64.cpp | 25 --- 5 files changed, 402 deletions(-) delete mode 100644 sycl/test-e2e/SubGroup/generic-shuffle.cpp delete mode 100644 sycl/test-e2e/SubGroup/shuffle.cpp delete mode 100644 sycl/test-e2e/SubGroup/shuffle_fp16.cpp delete mode 100644 sycl/test-e2e/SubGroup/shuffle_fp64.cpp diff --git a/sycl/include/sycl/sub_group.hpp b/sycl/include/sycl/sub_group.hpp index c405f436a9fe4..f80b0876a65a3 100644 --- a/sycl/include/sycl/sub_group.hpp +++ b/sycl/include/sycl/sub_group.hpp @@ -209,64 +209,6 @@ struct sub_group { #endif } - template - using EnableIfIsScalarArithmetic = - std::enable_if_t::value, T>; - - /* --- one-input shuffles --- */ - /* indices in [0 , sub_group size) */ - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle(T x, id_type local_id) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::Shuffle(*this, x, local_id); -#else - (void)x; - (void)local_id; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle_down(T x, uint32_t delta) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::ShuffleDown(*this, x, delta); -#else - (void)x; - (void)delta; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle_up(T x, uint32_t delta) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::ShuffleUp(*this, x, delta); -#else - (void)x; - (void)delta; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle_xor(T x, id_type value) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::ShuffleXor(*this, x, value); -#else - (void)x; - (void)value; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - /* --- sub_group load/stores --- */ /* these can map to SIMD or block read/write hardware where available */ #ifdef __SYCL_DEVICE_ONLY__ diff --git a/sycl/test-e2e/SubGroup/generic-shuffle.cpp b/sycl/test-e2e/SubGroup/generic-shuffle.cpp deleted file mode 100644 index cb9f1a720b1b7..0000000000000 --- a/sycl/test-e2e/SubGroup/generic-shuffle.cpp +++ /dev/null @@ -1,239 +0,0 @@ -// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out -// RUN: %{run} %t.out -// -//==-- generic_shuffle.cpp - SYCL sub_group generic shuffle test *- C++ -*--==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "helper.hpp" -#include -#include -#include -template class pointer_kernel; - -using namespace sycl; - -template -void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { - try { - nd_range<1> NdRange(G, L); - buffer buf(G); - buffer buf_up(G); - buffer buf_down(G); - buffer buf_xor(G); - buffer sgsizebuf(1); - Queue.submit([&](handler &cgh) { - auto acc = buf.template get_access(cgh); - auto acc_up = buf_up.template get_access(cgh); - auto acc_down = - buf_down.template get_access(cgh); - auto acc_xor = buf_xor.template get_access(cgh); - auto sgsizeacc = sgsizebuf.get_access(cgh); - - cgh.parallel_for( - NdRange, [=](nd_item<1> NdItem) { - sycl::sub_group SG = NdItem.get_sub_group(); - uint32_t wggid = NdItem.get_global_id(0); - uint32_t sgid = SG.get_group_id().get(0); - if (wggid == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; - - T *ptr = static_cast(0x0) + wggid; - - /*GID of middle element in every subgroup*/ - acc[NdItem.get_global_id()] = - SG.shuffle(ptr, SG.get_max_local_range()[0] / 2); - - /* Save GID-SGID */ - acc_up[NdItem.get_global_id()] = SG.shuffle_up(ptr, sgid); - - /* Save GID+SGID */ - acc_down[NdItem.get_global_id()] = SG.shuffle_down(ptr, sgid); - - /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - acc_xor[NdItem.get_global_id()] = - SG.shuffle_xor(ptr, sgid % SG.get_max_local_range()[0]); - }); - }); - host_accessor acc(buf); - host_accessor acc_up(buf_up); - host_accessor acc_down(buf_down); - host_accessor acc_xor(buf_xor); - host_accessor sgsizeacc(sgsizebuf); - - size_t sg_size = sgsizeacc[0]; - int SGid = 0; - int SGLid = 0; - int SGBeginGid = 0; - for (int j = 0; j < G; j++) { - if (j % L % sg_size == 0) { - SGid++; - SGLid = 0; - SGBeginGid = j; - } - if (j % L == 0) { - SGid = 0; - SGLid = 0; - SGBeginGid = j; - } - - /*GID of middle element in every subgroup*/ - exit_if_not_equal(acc[j], - static_cast(0x0) + - (j / L * L + SGid * sg_size + sg_size / 2), - "shuffle"); - - /* Value GID+SGID for all element except last SGID in SG*/ - if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) { - exit_if_not_equal(acc_down[j], static_cast(0x0) + (j + SGid), - "shuffle_down"); - } - - /* Value GID-SGID for all element except first SGID in SG*/ - if (j % L % sg_size >= SGid) { - exit_if_not_equal(acc_up[j], static_cast(0x0) + (j - SGid), - "shuffle_up"); - } - - /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - exit_if_not_equal(acc_xor[j], - static_cast(0x0) + - (SGBeginGid + (SGLid ^ (SGid % sg_size))), - "shuffle_xor"); - SGLid++; - } - } catch (exception e) { - std::cout << "SYCL exception caught: " << e.what(); - exit(1); - } -} - -template -void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) { - - // Fill a vector with values that will be shuffled - std::vector values(G); - std::generate(values.begin(), values.end(), Gen); - - try { - nd_range<1> NdRange(G, L); - buffer buf(G); - buffer buf_up(G); - buffer buf_down(G); - buffer buf_xor(G); - buffer sgsizebuf(1); - buffer buf_in(values.data(), values.size()); - Queue.submit([&](handler &cgh) { - auto acc = buf.template get_access(cgh); - auto acc_up = buf_up.template get_access(cgh); - auto acc_down = - buf_down.template get_access(cgh); - auto acc_xor = buf_xor.template get_access(cgh); - auto sgsizeacc = sgsizebuf.get_access(cgh); - auto in = buf_in.template get_access(cgh); - - cgh.parallel_for( - NdRange, [=](nd_item<1> NdItem) { - sycl::sub_group SG = NdItem.get_sub_group(); - uint32_t wggid = NdItem.get_global_id(0); - uint32_t sgid = SG.get_group_id().get(0); - if (wggid == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; - - T val = in[wggid]; - - /*GID of middle element in every subgroup*/ - acc[NdItem.get_global_id()] = - SG.shuffle(val, SG.get_max_local_range()[0] / 2); - - /* Save GID-SGID */ - acc_up[NdItem.get_global_id()] = SG.shuffle_up(val, sgid); - - /* Save GID+SGID */ - acc_down[NdItem.get_global_id()] = SG.shuffle_down(val, sgid); - - /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - acc_xor[NdItem.get_global_id()] = - SG.shuffle_xor(val, sgid % SG.get_max_local_range()[0]); - }); - }); - host_accessor acc(buf); - host_accessor acc_up(buf_up); - host_accessor acc_down(buf_down); - host_accessor acc_xor(buf_xor); - host_accessor sgsizeacc(sgsizebuf); - - size_t sg_size = sgsizeacc[0]; - int SGid = 0; - int SGLid = 0; - int SGBeginGid = 0; - for (int j = 0; j < G; j++) { - if (j % L % sg_size == 0) { - SGid++; - SGLid = 0; - SGBeginGid = j; - } - if (j % L == 0) { - SGid = 0; - SGLid = 0; - SGBeginGid = j; - } - - /*GID of middle element in every subgroup*/ - exit_if_not_equal( - acc[j], values[j / L * L + SGid * sg_size + sg_size / 2], "shuffle"); - - /* Value GID+SGID for all element except last SGID in SG*/ - if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) { - exit_if_not_equal(acc_down[j], values[j + SGid], "shuffle_down"); - } - - /* Value GID-SGID for all element except first SGID in SG*/ - if (j % L % sg_size >= SGid) { - exit_if_not_equal(acc_up[j], values[j - SGid], "shuffle_up"); - } - - /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - exit_if_not_equal(acc_xor[j], - values[SGBeginGid + (SGLid ^ (SGid % sg_size))], - "shuffle_xor"); - SGLid++; - } - } catch (exception e) { - std::cout << "SYCL exception caught: " << e.what(); - exit(1); - } -} - -int main() { - queue Queue; - - // Test shuffle of pointer types - check_pointer(Queue); - - // Test shuffle of non-native types - auto ComplexFloatGenerator = [state = std::complex(0, 1)]() mutable { - return state += std::complex(2, 2); - }; - check_struct>( - Queue, ComplexFloatGenerator); - - if (Queue.get_device().has(sycl::aspect::fp64)) { - auto ComplexDoubleGenerator = [state = - std::complex(0, 1)]() mutable { - return state += std::complex(2, 2); - }; - check_struct>( - Queue, ComplexDoubleGenerator); - } else { - std::cout << "fp64 tests were skipped due to the device not supporting the " - "aspect."; - } - - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/sycl/test-e2e/SubGroup/shuffle.cpp b/sycl/test-e2e/SubGroup/shuffle.cpp deleted file mode 100644 index 2baf624ce804c..0000000000000 --- a/sycl/test-e2e/SubGroup/shuffle.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -//==------------ shuffle.cpp - SYCL sub_group shuffle test -----*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "shuffle.hpp" -#include - -int main() { - queue Queue; - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - - // Check long long and unsigned long long because they differ from - // long and unsigned long according to C++ rules even if they have the same - // size at some system. - check(Queue); - check(Queue); - check(Queue); - check(Queue); - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/sycl/test-e2e/SubGroup/shuffle_fp16.cpp b/sycl/test-e2e/SubGroup/shuffle_fp16.cpp deleted file mode 100644 index b2fea85d0126f..0000000000000 --- a/sycl/test-e2e/SubGroup/shuffle_fp16.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// REQUIRES: aspect-fp16 -// REQUIRES: gpu - -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "shuffle.hpp" -#include - -int main() { - queue Queue; - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/sycl/test-e2e/SubGroup/shuffle_fp64.cpp b/sycl/test-e2e/SubGroup/shuffle_fp64.cpp deleted file mode 100644 index 1440de1d25a8a..0000000000000 --- a/sycl/test-e2e/SubGroup/shuffle_fp64.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// REQUIRES: aspect-fp64 -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// -//==------- shuffle_fp64.cpp - SYCL sub_group shuffle test -----*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "shuffle.hpp" -#include - -int main() { - queue Queue; - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - std::cout << "Test passed." << std::endl; - return 0; -}