From a2830d6aa2e8ad0da0e58f1642ed09723602e838 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 15 Aug 2024 02:34:04 -0700 Subject: [PATCH 001/441] Revert "Remove empty line." Accidental commit. This reverts commit 894d3eebe2109f7dec488d3415d5c6236e55a0da. --- clang/lib/Sema/SemaExpr.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index c4aa02ff0c217d2..0f58eb2840211de 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6593,6 +6593,7 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, "should only occur in error-recovery path."); return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy, VK_PRValue, RParenLoc, CurFPFeatureOverrides()); + } return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc, ExecConfig, IsExecConfig); } From ed8cfb651327a00f6ccf2b26890ee921f16f64a2 Mon Sep 17 00:00:00 2001 From: Andrey Timonin <112198242+EtoAndruwa@users.noreply.github.com> Date: Thu, 15 Aug 2024 12:40:55 +0300 Subject: [PATCH 002/441] [NFC][mlir][scf] Fix misspelling of replace (#101683) --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 4de8dacc0edbf33..e92d9503372cdf2 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -4322,7 +4322,7 @@ struct FoldConstantCase : OpRewritePattern { rewriter.inlineBlockBefore(&source, op); rewriter.eraseOp(terminator); - // Repalce the operation with a potentially empty list of results. + // Replace the operation with a potentially empty list of results. // Fold mechanism doesn't support the case where the result list is empty. rewriter.replaceOp(op, results); From 5e95571a90b1ec193b735b7312c5c2559d7ee5ea Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 15 Aug 2024 12:42:27 +0300 Subject: [PATCH 003/441] [analyzer] Do not reason about locations passed as inline asm input (#103714) If pointer is passed as input operand for inline assembly, it's possible that asm block will change memory behind this pointer. So if pointer is passed inside inline asm block, it's better to not guess and assume memory has unknown state. Without such change, we observed a lot of FP with hand-written `memcpy` and friends. --- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 8 +++++ clang/test/Analysis/asm.cpp | 32 +++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 686310d38ebd58f..493914f88ba9a50 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -3807,6 +3807,14 @@ void ExprEngine::VisitGCCAsmStmt(const GCCAsmStmt *A, ExplodedNode *Pred, state = state->bindLoc(*LV, UnknownVal(), Pred->getLocationContext()); } + // Do not reason about locations passed inside inline assembly. + for (const Expr *I : A->inputs()) { + SVal X = state->getSVal(I, Pred->getLocationContext()); + + if (std::optional LV = X.getAs()) + state = state->bindLoc(*LV, UnknownVal(), Pred->getLocationContext()); + } + Bldr.generateNode(A, Pred, state); } diff --git a/clang/test/Analysis/asm.cpp b/clang/test/Analysis/asm.cpp index 1180063502168f8..3181aea870c8aad 100644 --- a/clang/test/Analysis/asm.cpp +++ b/clang/test/Analysis/asm.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker debug.ExprInspection -fheinous-gnu-extensions -w %s -verify +// RUN: %clang_analyze_cc1 -triple=x86_64-unknown-unknown \ +// RUN: -analyzer-checker debug.ExprInspection,core -fheinous-gnu-extensions -w %s -verify int clang_analyzer_eval(int); @@ -10,3 +11,32 @@ void testRValueOutput() { clang_analyzer_eval(global == 1); // expected-warning{{UNKNOWN}} clang_analyzer_eval(ref == 1); // expected-warning{{UNKNOWN}} } + +void *MyMemcpy(void *d, const void *s, const int n) { + asm volatile ( + "cld\n rep movsb\n" + :: "S" (s), "D" (d), "c" (n) : "memory" + ); + return d; +} + +void testInlineAsmMemcpy(void) +{ + int a, b = 10, c; + MyMemcpy(&a, &b, sizeof(b)); + c = a; // no-warning +} + +void testInlineAsmMemcpyArray(void) +{ + int a[10], b[10] = {}, c; + MyMemcpy(&a, &b, sizeof(b)); + c = a[8]; // no-warning +} + +void testInlineAsmMemcpyUninit(void) +{ + int a[10], b[10] = {}, c; + MyMemcpy(&a[1], &b[1], sizeof(b) - sizeof(b[1])); + c = a[0]; // expected-warning{{Assigned value is garbage or undefined}} +} From 05f663081513c6293f80469132d083e2603ed036 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 15 Aug 2024 11:37:59 +0200 Subject: [PATCH 004/441] Revert "[Clang] [AST] Fix placeholder return type name mangling for MSVC 1920+ / VS2019+ (#102848)" It cause builds to start failing with Invalid type expected UNREACHABLE executed at clang/lib/AST/MicrosoftMangle.cpp:2551! see comments on the PR. > Partial fix for https://github.com/llvm/llvm-project/issues/92204. > This PR just fixes VS2019+ since that is the suite of compilers that I > require link compatibility with at the moment. > I still intend to fix VS2017 and to update llvm-undname in future PRs. > Once those are also finished and merged I'll close out > https://github.com/llvm/llvm-project/issues/92204. > I am hoping to get the llvm-undname PR up in a couple of weeks to be > able to demangle the VS2019+ name mangling. > > MSVC 1920+ mangles placeholder return types for non-templated functions > with "@". > For example `auto foo() { return 0; }` is mangled as `?foo@@YA@XZ`. > > MSVC 1920+ mangles placeholder return types for templated functions as > the qualifiers of the AutoType followed by "_P" for `auto` and "_T" for > `decltype(auto)`. > For example `template auto foo() { return 0; }` is mangled as > `??$foo@H@@YA?A_PXZ` when `foo` is instantiated as follows `foo()`. > > Lambdas with placeholder return types are still mangled with clang's > custom mangling since MSVC lambda mangling hasn't been deciphered yet. > Similarly any pointers in the return type with an address space are > mangled with clang's custom mangling since that is a clang extension. > > We cannot augment `mangleType` to support this mangling scheme as the > mangling schemes for variables and functions differ. > auto variables are encoded with the fully deduced type where auto return > types are not. > The following two functions with a static variable are mangled the same > ``` > template > int test() > { > static int i = 0; // "?i@?1???$test@H@@YAHXZ@4HA" > return i; > } > > template > int test() > { > static auto i = 0; // "?i@?1???$test@H@@YAHXZ@4HA" > return i; > } > ``` > Inside `mangleType` once we get to mangling the `AutoType` we have no > context if we are from a variable encoding or some other encoding. > Therefore it was easier to handle any special casing for `AutoType` > return types with a separate function instead of using the `mangleType` > infrastructure. This reverts commit e0d173d44161bf9b68243845666d58999e74f759 and the wollow-up fa343be414f9364911b947f109f3df5539e23068. --- clang/docs/ReleaseNotes.rst | 2 - clang/lib/AST/MicrosoftMangle.cpp | 162 +------- .../test/CodeGenCXX/mangle-ms-auto-return.cpp | 369 ------------------ .../mangle-ms-auto-templates-memptrs.cpp | 12 +- .../mangle-ms-auto-templates-nullptr.cpp | 2 +- .../CodeGenCXX/mangle-ms-auto-templates.cpp | 6 +- 6 files changed, 19 insertions(+), 534 deletions(-) delete mode 100644 clang/test/CodeGenCXX/mangle-ms-auto-return.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b1864901e7bddbc..f5696d6ce15dc7c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -77,8 +77,6 @@ C++ Specific Potentially Breaking Changes ABI Changes in This Version --------------------------- -- Fixed Microsoft name mangling of placeholder, auto and decltype(auto), return types for MSVC 1920+. This change resolves incompatibilities with code compiled by MSVC 1920+ but will introduce incompatibilities with code compiled by earlier versions of Clang unless such code is built with the compiler option -fms-compatibility-version=19.14 to imitate the MSVC 1914 mangling behavior. - AST Dumping Potentially Breaking Changes ---------------------------------------- diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index db8000e25dc7cc6..ed8d1cf1b98dd80 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -408,8 +408,6 @@ class MicrosoftCXXNameMangler { void mangleSourceName(StringRef Name); void mangleNestedName(GlobalDecl GD); - void mangleAutoReturnType(QualType T, QualifierMangleMode QMM); - private: bool isStructorDecl(const NamedDecl *ND) const { return ND == Structor || getStructor(ND) == Structor; @@ -479,11 +477,6 @@ class MicrosoftCXXNameMangler { SourceRange Range); void mangleObjCKindOfType(const ObjCObjectType *T, Qualifiers Quals, SourceRange Range); - - void mangleAutoReturnType(const MemberPointerType *T, Qualifiers Quals); - void mangleAutoReturnType(const PointerType *T, Qualifiers Quals); - void mangleAutoReturnType(const LValueReferenceType *T, Qualifiers Quals); - void mangleAutoReturnType(const RValueReferenceType *T, Qualifiers Quals); }; } @@ -2501,57 +2494,6 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T, mangleArtificialTagType(TagTypeKind::Struct, ASMangling, {"__clang"}); } -void MicrosoftCXXNameMangler::mangleAutoReturnType(QualType T, - QualifierMangleMode QMM) { - assert(getASTContext().getLangOpts().isCompatibleWithMSVC( - LangOptions::MSVC2019) && - "Cannot mangle MSVC 2017 auto return types!"); - - if (isa(T)) { - const auto *AT = T->getContainedAutoType(); - Qualifiers Quals = T.getLocalQualifiers(); - - if (QMM == QMM_Result) - Out << '?'; - if (QMM != QMM_Drop) - mangleQualifiers(Quals, false); - Out << (AT->isDecltypeAuto() ? "_T" : "_P"); - return; - } - - T = T.getDesugaredType(getASTContext()); - Qualifiers Quals = T.getLocalQualifiers(); - - switch (QMM) { - case QMM_Drop: - case QMM_Result: - break; - case QMM_Mangle: - mangleQualifiers(Quals, false); - break; - default: - llvm_unreachable("QMM_Escape unexpected"); - } - - const Type *ty = T.getTypePtr(); - switch (ty->getTypeClass()) { - case Type::MemberPointer: - mangleAutoReturnType(cast(ty), Quals); - break; - case Type::Pointer: - mangleAutoReturnType(cast(ty), Quals); - break; - case Type::LValueReference: - mangleAutoReturnType(cast(ty), Quals); - break; - case Type::RValueReference: - mangleAutoReturnType(cast(ty), Quals); - break; - default: - llvm_unreachable("Invalid type expected"); - } -} - void MicrosoftCXXNameMangler::mangleType(QualType T, SourceRange Range, QualifierMangleMode QMM) { // Don't use the canonical types. MSVC includes things like 'const' on @@ -2965,52 +2907,17 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, // can differ by their calling convention and are typically deduced. So // we make sure that this type gets mangled properly. mangleType(ResultType, Range, QMM_Result); - } else if (IsInLambda) { - if (const auto *AT = ResultType->getContainedAutoType()) { - assert(AT->getKeyword() == AutoTypeKeyword::Auto && - "should only need to mangle auto!"); - (void)AT; - Out << '?'; - mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false); - Out << '?'; - mangleSourceName(""); - Out << '@'; - } else { - Out << '@'; - } - } else if (const auto *AT = ResultType->getContainedAutoType()) { + } else if (const auto *AT = dyn_cast_or_null( + ResultType->getContainedAutoType())) { + Out << '?'; + mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false); + Out << '?'; assert(AT->getKeyword() != AutoTypeKeyword::GNUAutoType && "shouldn't need to mangle __auto_type!"); - - // If we have any pointer types with the clang address space extension - // then defer to the custom clang mangling to keep backwards - // compatibility. See `mangleType(const PointerType *T, Qualifiers Quals, - // SourceRange Range)` for details. - auto UseClangMangling = [](QualType ResultType) { - QualType T = ResultType; - while (isa(T.getTypePtr())) { - T = T->getPointeeType(); - if (T.getQualifiers().hasAddressSpace()) - return true; - } - return false; - }; - - if (getASTContext().getLangOpts().isCompatibleWithMSVC( - LangOptions::MSVC2019) && - !UseClangMangling(ResultType)) { - if (D && !D->getPrimaryTemplate()) { - Out << '@'; - } else { - mangleAutoReturnType(ResultType, QMM_Result); - } - } else { - Out << '?'; - mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false); - Out << '?'; - mangleSourceName(AT->isDecltypeAuto() ? "" : ""); - Out << '@'; - } + mangleSourceName(AT->isDecltypeAuto() ? "" : ""); + Out << '@'; + } else if (IsInLambda) { + Out << '@'; } else { if (ResultType->isVoidType()) ResultType = ResultType.getUnqualifiedType(); @@ -4313,57 +4220,6 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL, Mangler.getStream() << '@'; } -void MicrosoftCXXNameMangler::mangleAutoReturnType(const MemberPointerType *T, - Qualifiers Quals) { - QualType PointeeType = T->getPointeeType(); - manglePointerCVQualifiers(Quals); - manglePointerExtQualifiers(Quals, PointeeType); - if (const FunctionProtoType *FPT = PointeeType->getAs()) { - Out << '8'; - mangleName(T->getClass()->castAs()->getDecl()); - mangleFunctionType(FPT, nullptr, true); - } else { - mangleQualifiers(PointeeType.getQualifiers(), true); - mangleName(T->getClass()->castAs()->getDecl()); - mangleAutoReturnType(PointeeType, QMM_Drop); - } -} - -void MicrosoftCXXNameMangler::mangleAutoReturnType(const PointerType *T, - Qualifiers Quals) { - QualType PointeeType = T->getPointeeType(); - assert(!PointeeType.getQualifiers().hasAddressSpace() && - "Unexpected address space mangling required"); - - manglePointerCVQualifiers(Quals); - manglePointerExtQualifiers(Quals, PointeeType); - - if (const FunctionProtoType *FPT = PointeeType->getAs()) { - Out << '6'; - mangleFunctionType(FPT); - } else { - mangleAutoReturnType(PointeeType, QMM_Mangle); - } -} - -void MicrosoftCXXNameMangler::mangleAutoReturnType(const LValueReferenceType *T, - Qualifiers Quals) { - QualType PointeeType = T->getPointeeType(); - assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!"); - Out << 'A'; - manglePointerExtQualifiers(Quals, PointeeType); - mangleAutoReturnType(PointeeType, QMM_Mangle); -} - -void MicrosoftCXXNameMangler::mangleAutoReturnType(const RValueReferenceType *T, - Qualifiers Quals) { - QualType PointeeType = T->getPointeeType(); - assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!"); - Out << "$$Q"; - manglePointerExtQualifiers(Quals, PointeeType); - mangleAutoReturnType(PointeeType, QMM_Mangle); -} - MicrosoftMangleContext *MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags, bool IsAux) { diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp deleted file mode 100644 index 737c9c407f47037..000000000000000 --- a/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp +++ /dev/null @@ -1,369 +0,0 @@ -// RUN: %clang_cc1 -std=c++17 -fms-compatibility-version=19.20 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=x86_64-pc-windows-msvc | FileCheck %s - -struct StructA {}; - -template -auto AutoT() { return T(); } - -template -const auto AutoConstT() { return T(); } - -template -volatile auto AutoVolatileT() { return T(); } - -template -const volatile auto AutoConstVolatileT() { return T(); } - -// The qualifiers of the return type should always be emitted even for void types. -// Void types usually have their qualifers stripped in the mangled name for MSVC ABI. -void test_template_auto_void() { - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@X@@YA?A_PXZ" - - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@$$CBX@@YA?A_PXZ" - - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@$$CCX@@YA?A_PXZ" - - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@$$CDX@@YA?A_PXZ" - - AutoConstT(); - // CHECK: call {{.*}} @"??$AutoConstT@X@@YA?B_PXZ" - - AutoVolatileT(); - // CHECK: call {{.*}} @"??$AutoVolatileT@X@@YA?C_PXZ" - - AutoConstVolatileT(); - // CHECK: call {{.*}} @"??$AutoConstVolatileT@X@@YA?D_PXZ" -} - -void test_template_auto_int() { - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@H@@YA?A_PXZ" - - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@$$CBH@@YA?A_PXZ" - - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@$$CCH@@YA?A_PXZ" - - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@$$CDH@@YA?A_PXZ" - - AutoConstT(); - // CHECK: call {{.*}} @"??$AutoConstT@H@@YA?B_PXZ" - - AutoVolatileT(); - // CHECK: call {{.*}} @"??$AutoVolatileT@H@@YA?C_PXZ" - - AutoConstVolatileT(); - // CHECK: call {{.*}} @"??$AutoConstVolatileT@H@@YA?D_PXZ" -} - -void test_template_auto_struct() { - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@UStructA@@@@YA?A_PXZ" - - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@$$CBUStructA@@@@YA?A_PXZ" - - AutoConstT(); - // CHECK: call {{.*}} @"??$AutoConstT@UStructA@@@@YA?B_PXZ" - - AutoVolatileT(); - // CHECK: call {{.*}} @"??$AutoVolatileT@UStructA@@@@YA?C_PXZ" - - AutoConstVolatileT(); - // CHECK: call {{.*}} @"??$AutoConstVolatileT@UStructA@@@@YA?D_PXZ" -} - -void test_template_auto_ptr() { - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@PEAH@@YA?A_PXZ" - - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@PEBH@@YA?A_PXZ" - - AutoT(); - // CHECK: call {{.*}} @"??$AutoT@QEBH@@YA?A_PXZ" - - AutoConstT(); - // CHECK: call {{.*}} @"??$AutoConstT@PEAH@@YA?B_PXZ" - - AutoVolatileT(); - // CHECK: call {{.*}} @"??$AutoVolatileT@PEAH@@YA?C_PXZ" - - AutoConstVolatileT(); - // CHECK: call {{.*}} @"??$AutoConstVolatileT@PEAH@@YA?D_PXZ" -} - -template -auto* PtrAutoT() { return T(); } - -template -const auto* PtrAutoConstT() { return T(); } - -template -volatile auto* PtrAutoVolatileT() { return T(); } - -template -const volatile auto* PtrAutoConstVolatileT() { return T(); } - -void test_template_ptr_auto() { - PtrAutoT(); - // CHECK: call {{.*}} @"??$PtrAutoT@PEAH@@YAPEA_PXZ" - - PtrAutoT(); - // CHECK: call {{.*}} @"??$PtrAutoT@PEBH@@YAPEA_PXZ" - - PtrAutoT(); - // CHECK: call {{.*}} @"??$PtrAutoT@QEBH@@YAPEA_PXZ" - - PtrAutoConstT(); - // CHECK: call {{.*}} @"??$PtrAutoConstT@PEAH@@YAPEB_PXZ" - - PtrAutoVolatileT(); - // CHECK: call {{.*}} @"??$PtrAutoVolatileT@PEAH@@YAPEC_PXZ" - - PtrAutoConstVolatileT(); - // CHECK: call {{.*}} @"??$PtrAutoConstVolatileT@PEAH@@YAPED_PXZ" -} - -int func_int(); -const int func_constint(); -void func_void(); -int* func_intptr(); - -template -auto (*FuncPtrAutoT())() { return v; } - -void test_template_func_ptr_auto() { - FuncPtrAutoT(); - // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6AHXZ$1?func_int@@YAHXZ@@YAP6A?A_PXZXZ" - - FuncPtrAutoT(); - // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6A?BHXZ$1?func_constint@@YA?BHXZ@@YAP6A?A_PXZXZ" - - FuncPtrAutoT(); - // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6AXXZ$1?func_void@@YAXXZ@@YAP6A?A_PXZXZ" - - FuncPtrAutoT(); - // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6APEAHXZ$1?func_intptr@@YAPEAHXZ@@YAP6A?A_PXZXZ" -} - -template -auto& RefAutoT(T& x) { return x; } - -template -const auto& ConstRefAutoT(T& x) { return x; } - -template -auto&& RRefAutoT(T& x) { return static_cast(x); } - -void test_template_ref_auto() { - int x; - - RefAutoT(x); - // CHECK: call {{.*}} @"??$RefAutoT@H@@YAAEA_PAEAH@Z" - - ConstRefAutoT(x); - // CHECK: call {{.*}} @"??$ConstRefAutoT@H@@YAAEB_PAEAH@Z" - - RRefAutoT(x); - // CHECK: call {{.*}} @"??$RRefAutoT@H@@YA$$QEA_PAEAH@Z" -} - -template -decltype(auto) DecltypeAutoT() { return T(); } - -template -decltype(auto) DecltypeAutoT2(T& x) { return static_cast(x); } - -void test_template_decltypeauto() { - DecltypeAutoT(); - // CHECK: call {{.*}} @"??$DecltypeAutoT@X@@YA?A_TXZ" - - DecltypeAutoT(); - // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CBX@@YA?A_TXZ" - - DecltypeAutoT(); - // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CCX@@YA?A_TXZ" - - DecltypeAutoT(); - // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CDX@@YA?A_TXZ" - - DecltypeAutoT(); - // CHECK: call {{.*}} @"??$DecltypeAutoT@H@@YA?A_TXZ" - - DecltypeAutoT(); - // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CBH@@YA?A_TXZ" - - DecltypeAutoT(); - // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CCH@@YA?A_TXZ" - - DecltypeAutoT(); - // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CDH@@YA?A_TXZ" - - int x; - - DecltypeAutoT2(x); - // CHECK: call {{.*}} @"??$DecltypeAutoT2@H@@YA?A_TAEAH@Z" -} - -// Still want to use clang's custom mangling for lambdas to keep backwards compatibility until -// MSVC lambda name mangling has been deciphered. -void test_lambda() { - auto lambdaIntRetAuto = []() { return 0; }; - lambdaIntRetAuto(); - // CHECK: call {{.*}} @"??R@?0??test_lambda@@YAXXZ@QEBA?A?@@XZ" - - auto lambdaIntRet = []() -> int { return 0; }; - lambdaIntRet(); - // CHECK: call {{.*}} @"??R@?0??test_lambda@@YAXXZ@QEBA@XZ" - - auto lambdaGenericIntIntRetAuto = [](auto a) { return a; }; - lambdaGenericIntIntRetAuto(0); - // CHECK: call {{.*}} @"??$?RH@@?0??test_lambda@@YAXXZ@QEBA?A?@@H@Z" -} - -auto TestTrailingInt() -> int { - return 0; -} - -auto TestTrailingConstVolatileVoid() -> const volatile void { -} - -auto TestTrailingStructA() -> StructA { - return StructA{}; -} - -void test_trailing_return() { - TestTrailingInt(); - // CHECK: call {{.*}} @"?TestTrailingInt@@YAHXZ" - - TestTrailingConstVolatileVoid(); - // CHECK: call {{.*}} @"?TestTrailingConstVolatileVoid@@YAXXZ" - - TestTrailingStructA(); - // CHECK: call {{.*}} @"?TestTrailingStructA@@YA?AUStructA@@XZ" -} - -auto TestNonTemplateAutoInt() { - return 0; -} - -auto TestNonTemplateAutoVoid() { - return; -} - -auto TestNonTemplateAutoStructA() { - return StructA{}; -} - -const auto TestNonTemplateConstAutoInt() { - return 0; -} - -const auto TestNonTemplateConstAutoVoid() { - return; -} - -const auto TestNonTemplateConstAutoStructA() { - return StructA{}; -} - -void test_nontemplate_auto() { - TestNonTemplateAutoInt(); - // CHECK: call {{.*}} @"?TestNonTemplateAutoInt@@YA@XZ" - - TestNonTemplateAutoVoid(); - // CHECK: call {{.*}} @"?TestNonTemplateAutoVoid@@YA@XZ" - - TestNonTemplateAutoStructA(); - // CHECK: call {{.*}} @"?TestNonTemplateAutoStructA@@YA@XZ" - - TestNonTemplateConstAutoInt(); - // CHECK: call {{.*}} @"?TestNonTemplateConstAutoInt@@YA@XZ" - - TestNonTemplateConstAutoVoid(); - // CHECK: call {{.*}} @"?TestNonTemplateConstAutoVoid@@YA@XZ" - - TestNonTemplateConstAutoStructA(); - // CHECK: call {{.*}} @"?TestNonTemplateConstAutoStructA@@YA@XZ" -} - -decltype(auto) TestNonTemplateDecltypeAutoInt() { - return 0; -} - -decltype(auto) TestNonTemplateDecltypeAutoVoid() { - return; -} - -decltype(auto) TestNonTemplateDecltypeAutoStructA() { - return StructA{}; -} - -void test_nontemplate_decltypeauto() { - TestNonTemplateDecltypeAutoInt(); - // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoInt@@YA@XZ" - - TestNonTemplateDecltypeAutoVoid(); - // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoVoid@@YA@XZ" - - TestNonTemplateDecltypeAutoStructA(); - // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoStructA@@YA@XZ" -} - -struct StructB { - int x; -}; - -template -auto StructB::* AutoMemberDataPtrT(T x) { return x; } - -template -const auto StructB::* AutoConstMemberDataPtrT(T x) { return x; } - -void test_template_auto_member_data_ptr() { - AutoMemberDataPtrT(&StructB::x); - // CHECK: call {{.*}} @"??$AutoMemberDataPtrT@PEQStructB@@H@@YAPEQStructB@@_PPEQ0@H@Z" - - AutoConstMemberDataPtrT(&StructB::x); - // CHECK: call {{.*}} @"??$AutoConstMemberDataPtrT@PEQStructB@@H@@YAPERStructB@@_PPEQ0@H@Z" -} - -struct StructC { - void test() {} -}; - -struct StructD { - const int test() { return 0; } -}; - -template -auto (StructC::*AutoMemberFuncPtrT(T x))() { return x; } - -template -const auto (StructD::*AutoConstMemberFuncPtrT(T x))() { return x; } - -void test_template_auto_member_func_ptr() { - AutoMemberFuncPtrT(&StructC::test); - // CHECK: call {{.*}} @"??$AutoMemberFuncPtrT@P8StructC@@EAAXXZ@@YAP8StructC@@EAA?A_PXZP80@EAAXXZ@Z" - - AutoConstMemberFuncPtrT(&StructD::test); - // CHECK: call {{.*}} @"??$AutoConstMemberFuncPtrT@P8StructD@@EAA?BHXZ@@YAP8StructD@@EAA?B_PXZP80@EAA?BHXZ@Z" -} - -template -auto * __attribute__((address_space(1))) * AutoPtrAddressSpaceT() { - T * __attribute__((address_space(1))) * p = nullptr; - return p; -} - -void test_template_auto_address_space_ptr() { - AutoPtrAddressSpaceT(); - // CHECK: call {{.*}} @"??$AutoPtrAddressSpaceT@H@@YA?A?@@XZ" -} diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp index b7bc3953f0b4380..360ebdecc5562be 100644 --- a/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp +++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp @@ -34,15 +34,15 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$I?f@V@@QEAAXXZA@A@@@QEAA@XZ" AutoFunc<&S::f>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP8S@@EAAXXZ1?f@1@QEAAXXZ@@YA?A_PXZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP8S@@EAAXXZ1?f@1@QEAAXXZ@@YA?A?@@XZ" // BEFORE: call {{.*}} @"??$AutoFunc@$1?f@S@@QEAAXXZ@@YA?A?@@XZ" AutoFunc<&M::f>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP8M@@EAAXXZH?f@1@QEAAXXZA@@@YA?A_PXZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP8M@@EAAXXZH?f@1@QEAAXXZA@@@YA?A?@@XZ" // BEFORE: call {{.*}} @"??$AutoFunc@$H?f@M@@QEAAXXZA@@@YA?A?@@XZ" AutoFunc<&V::f>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP8V@@EAAXXZI?f@1@QEAAXXZA@A@@@YA?A_PXZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP8V@@EAAXXZI?f@1@QEAAXXZA@A@@@YA?A?@@XZ" // BEFORE: call {{.*}} @"??$AutoFunc@$I?f@V@@QEAAXXZA@A@@@YA?A?@@XZ" AutoParmTemplate<&S::a> auto_data_single_inheritance; @@ -58,14 +58,14 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$FBA@A@@@QEAA@XZ" AutoFunc<&S::a>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEQS@@H07@@YA?A_PXZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEQS@@H07@@YA?A?@@XZ" // BEFORE: call {{.*}} @"??$AutoFunc@$07@@YA?A?@@XZ" AutoFunc<&M::a>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEQM@@H0M@@@YA?A_PXZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEQM@@H0M@@@YA?A?@@XZ" // BEFORE: call {{.*}} @"??$AutoFunc@$0M@@@YA?A?@@XZ" AutoFunc<&V::a>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEQV@@HFBA@A@@@YA?A_PXZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEQV@@HFBA@A@@@YA?A?@@XZ" // BEFORE: call {{.*}} @"??$AutoFunc@$FBA@A@@@YA?A?@@XZ" } diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp index 251d9219c01ce28..8f98c1e59f73d79 100644 --- a/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp +++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp @@ -19,6 +19,6 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$0A@@@QEAA@XZ" AutoFunc(); - // AFTER: call {{.*}} @"??$AutoFunc@$M$$T0A@@@YA?A_PXZ" + // AFTER: call {{.*}} @"??$AutoFunc@$M$$T0A@@@YA?A?@@XZ" // BEFORE: call {{.*}} @"??$AutoFunc@$0A@@@YA?A?@@XZ" } diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp index effcc31ee311038..ff5395cea75eb77 100644 --- a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp +++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp @@ -26,7 +26,7 @@ int j; void template_mangling() { AutoFunc<1>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A_PXZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A?@@XZ" // BEFORE: call {{.*}} @"??$AutoFunc@$00@@YA?A?@@XZ" AutoParmTemplate<0> auto_int; // AFTER: call {{.*}} @"??0?$AutoParmTemplate@$MH0A@@@QEAA@XZ" @@ -52,7 +52,7 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$00$0HPPPPPPPPPPPPPPP@@@QEAA@XZ" AutoFunc<&i>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEAH1?i@@3HA@@YA?A_PXZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEAH1?i@@3HA@@YA?A?@@XZ" // BEFORE: call {{.*}} @"??$AutoFunc@$1?i@@3HA@@YA?A?@@XZ" AutoParmTemplate<&i> auto_int_ptr; @@ -64,7 +64,7 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$1?i@@3HA$1?j@@3HA@@QEAA@XZ" AutoFunc<&Func>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP6AHXZ1?Func@@YAHXZ@@YA?A_PXZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP6AHXZ1?Func@@YAHXZ@@YA?A?@@XZ" // BEFORE: call {{.*}} @"??$AutoFunc@$1?Func@@YAHXZ@@YA?A?@@XZ" AutoParmTemplate<&Func> auto_func_ptr; From 36231a5b5525b950daf1b7430859061b31a8e01e Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 15 Aug 2024 11:20:20 +0100 Subject: [PATCH 005/441] [AArch64] Add verification for MemOp immediate ranges (#97561) This adds an implementation of AArch64InstrInfo::verifyInstruction for AArch64, and adds some basic verification of the range of immediate ranges of memory operations using the information from getMemOpInfo. Some extra memory operations have been added to getMemOpInfo, along with the equivalent opcodes to getLoadStoreImmIdx to ensure we use the correct index. Please let us know if this starts reporting verification failures, Thanks. --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 394 +++++++++++++----- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 4 + .../machine-outliner-unsafe-stack-call.mir | 16 +- llvm/test/CodeGen/AArch64/verify-memop.mir | 51 +++ 4 files changed, 359 insertions(+), 106 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/verify-memop.mir diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index e425984a6778443..5f8ed9cb6180a2c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2344,132 +2344,258 @@ std::optional AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) { unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { switch (Opc) { default: + llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx"); + case AArch64::ADDG: + case AArch64::LDAPURBi: + case AArch64::LDAPURHi: + case AArch64::LDAPURi: + case AArch64::LDAPURSBWi: + case AArch64::LDAPURSBXi: + case AArch64::LDAPURSHWi: + case AArch64::LDAPURSHXi: + case AArch64::LDAPURSWi: + case AArch64::LDAPURXi: + case AArch64::LDR_PPXI: + case AArch64::LDR_PXI: + case AArch64::LDR_ZXI: + case AArch64::LDR_ZZXI: + case AArch64::LDR_ZZZXI: + case AArch64::LDR_ZZZZXI: + case AArch64::LDRBBui: + case AArch64::LDRBui: + case AArch64::LDRDui: + case AArch64::LDRHHui: + case AArch64::LDRHui: + case AArch64::LDRQui: + case AArch64::LDRSBWui: + case AArch64::LDRSBXui: + case AArch64::LDRSHWui: + case AArch64::LDRSHXui: + case AArch64::LDRSui: + case AArch64::LDRSWui: + case AArch64::LDRWui: + case AArch64::LDRXui: + case AArch64::LDURBBi: + case AArch64::LDURBi: + case AArch64::LDURDi: + case AArch64::LDURHHi: + case AArch64::LDURHi: + case AArch64::LDURQi: + case AArch64::LDURSBWi: + case AArch64::LDURSBXi: + case AArch64::LDURSHWi: + case AArch64::LDURSHXi: + case AArch64::LDURSi: + case AArch64::LDURSWi: + case AArch64::LDURWi: + case AArch64::LDURXi: + case AArch64::PRFMui: + case AArch64::PRFUMi: + case AArch64::ST2Gi: + case AArch64::STGi: + case AArch64::STLURBi: + case AArch64::STLURHi: + case AArch64::STLURWi: + case AArch64::STLURXi: + case AArch64::StoreSwiftAsyncContext: + case AArch64::STR_PPXI: + case AArch64::STR_PXI: + case AArch64::STR_ZXI: + case AArch64::STR_ZZXI: + case AArch64::STR_ZZZXI: + case AArch64::STR_ZZZZXI: + case AArch64::STRBBui: + case AArch64::STRBui: + case AArch64::STRDui: + case AArch64::STRHHui: + case AArch64::STRHui: + case AArch64::STRQui: + case AArch64::STRSui: + case AArch64::STRWui: + case AArch64::STRXui: + case AArch64::STURBBi: + case AArch64::STURBi: + case AArch64::STURDi: + case AArch64::STURHHi: + case AArch64::STURHi: + case AArch64::STURQi: + case AArch64::STURSi: + case AArch64::STURWi: + case AArch64::STURXi: + case AArch64::STZ2Gi: + case AArch64::STZGi: + case AArch64::TAGPstack: return 2; - case AArch64::LDPXi: - case AArch64::LDPDi: - case AArch64::STPXi: - case AArch64::STPDi: - case AArch64::LDNPXi: - case AArch64::LDNPDi: - case AArch64::STNPXi: - case AArch64::STNPDi: - case AArch64::LDPQi: - case AArch64::STPQi: - case AArch64::LDNPQi: - case AArch64::STNPQi: - case AArch64::LDPWi: - case AArch64::LDPSi: - case AArch64::STPWi: - case AArch64::STPSi: - case AArch64::LDNPWi: - case AArch64::LDNPSi: - case AArch64::STNPWi: - case AArch64::STNPSi: - case AArch64::LDG: - case AArch64::STGPi: - - case AArch64::LD1B_IMM: + case AArch64::LD1B_D_IMM: case AArch64::LD1B_H_IMM: + case AArch64::LD1B_IMM: case AArch64::LD1B_S_IMM: - case AArch64::LD1B_D_IMM: - case AArch64::LD1SB_H_IMM: - case AArch64::LD1SB_S_IMM: - case AArch64::LD1SB_D_IMM: + case AArch64::LD1D_IMM: + case AArch64::LD1H_D_IMM: case AArch64::LD1H_IMM: case AArch64::LD1H_S_IMM: - case AArch64::LD1H_D_IMM: - case AArch64::LD1SH_S_IMM: + case AArch64::LD1RB_D_IMM: + case AArch64::LD1RB_H_IMM: + case AArch64::LD1RB_IMM: + case AArch64::LD1RB_S_IMM: + case AArch64::LD1RD_IMM: + case AArch64::LD1RH_D_IMM: + case AArch64::LD1RH_IMM: + case AArch64::LD1RH_S_IMM: + case AArch64::LD1RSB_D_IMM: + case AArch64::LD1RSB_H_IMM: + case AArch64::LD1RSB_S_IMM: + case AArch64::LD1RSH_D_IMM: + case AArch64::LD1RSH_S_IMM: + case AArch64::LD1RSW_IMM: + case AArch64::LD1RW_D_IMM: + case AArch64::LD1RW_IMM: + case AArch64::LD1SB_D_IMM: + case AArch64::LD1SB_H_IMM: + case AArch64::LD1SB_S_IMM: case AArch64::LD1SH_D_IMM: - case AArch64::LD1W_IMM: - case AArch64::LD1W_D_IMM: + case AArch64::LD1SH_S_IMM: case AArch64::LD1SW_D_IMM: - case AArch64::LD1D_IMM: - + case AArch64::LD1W_D_IMM: + case AArch64::LD1W_IMM: case AArch64::LD2B_IMM: + case AArch64::LD2D_IMM: case AArch64::LD2H_IMM: case AArch64::LD2W_IMM: - case AArch64::LD2D_IMM: case AArch64::LD3B_IMM: + case AArch64::LD3D_IMM: case AArch64::LD3H_IMM: case AArch64::LD3W_IMM: - case AArch64::LD3D_IMM: case AArch64::LD4B_IMM: + case AArch64::LD4D_IMM: case AArch64::LD4H_IMM: case AArch64::LD4W_IMM: - case AArch64::LD4D_IMM: - - case AArch64::ST1B_IMM: + case AArch64::LDG: + case AArch64::LDNF1B_D_IMM: + case AArch64::LDNF1B_H_IMM: + case AArch64::LDNF1B_IMM: + case AArch64::LDNF1B_S_IMM: + case AArch64::LDNF1D_IMM: + case AArch64::LDNF1H_D_IMM: + case AArch64::LDNF1H_IMM: + case AArch64::LDNF1H_S_IMM: + case AArch64::LDNF1SB_D_IMM: + case AArch64::LDNF1SB_H_IMM: + case AArch64::LDNF1SB_S_IMM: + case AArch64::LDNF1SH_D_IMM: + case AArch64::LDNF1SH_S_IMM: + case AArch64::LDNF1SW_D_IMM: + case AArch64::LDNF1W_D_IMM: + case AArch64::LDNF1W_IMM: + case AArch64::LDNPDi: + case AArch64::LDNPQi: + case AArch64::LDNPSi: + case AArch64::LDNPWi: + case AArch64::LDNPXi: + case AArch64::LDNT1B_ZRI: + case AArch64::LDNT1D_ZRI: + case AArch64::LDNT1H_ZRI: + case AArch64::LDNT1W_ZRI: + case AArch64::LDPDi: + case AArch64::LDPQi: + case AArch64::LDPSi: + case AArch64::LDPWi: + case AArch64::LDPXi: + case AArch64::LDRBBpost: + case AArch64::LDRBBpre: + case AArch64::LDRBpost: + case AArch64::LDRBpre: + case AArch64::LDRDpost: + case AArch64::LDRDpre: + case AArch64::LDRHHpost: + case AArch64::LDRHHpre: + case AArch64::LDRHpost: + case AArch64::LDRHpre: + case AArch64::LDRQpost: + case AArch64::LDRQpre: + case AArch64::LDRSpost: + case AArch64::LDRSpre: + case AArch64::LDRWpost: + case AArch64::LDRWpre: + case AArch64::LDRXpost: + case AArch64::LDRXpre: + case AArch64::ST1B_D_IMM: case AArch64::ST1B_H_IMM: + case AArch64::ST1B_IMM: case AArch64::ST1B_S_IMM: - case AArch64::ST1B_D_IMM: + case AArch64::ST1D_IMM: + case AArch64::ST1H_D_IMM: case AArch64::ST1H_IMM: case AArch64::ST1H_S_IMM: - case AArch64::ST1H_D_IMM: - case AArch64::ST1W_IMM: case AArch64::ST1W_D_IMM: - case AArch64::ST1D_IMM: - + case AArch64::ST1W_IMM: case AArch64::ST2B_IMM: + case AArch64::ST2D_IMM: case AArch64::ST2H_IMM: case AArch64::ST2W_IMM: - case AArch64::ST2D_IMM: case AArch64::ST3B_IMM: + case AArch64::ST3D_IMM: case AArch64::ST3H_IMM: case AArch64::ST3W_IMM: - case AArch64::ST3D_IMM: case AArch64::ST4B_IMM: + case AArch64::ST4D_IMM: case AArch64::ST4H_IMM: case AArch64::ST4W_IMM: - case AArch64::ST4D_IMM: - - case AArch64::LD1RB_IMM: - case AArch64::LD1RB_H_IMM: - case AArch64::LD1RB_S_IMM: - case AArch64::LD1RB_D_IMM: - case AArch64::LD1RSB_H_IMM: - case AArch64::LD1RSB_S_IMM: - case AArch64::LD1RSB_D_IMM: - case AArch64::LD1RH_IMM: - case AArch64::LD1RH_S_IMM: - case AArch64::LD1RH_D_IMM: - case AArch64::LD1RSH_S_IMM: - case AArch64::LD1RSH_D_IMM: - case AArch64::LD1RW_IMM: - case AArch64::LD1RW_D_IMM: - case AArch64::LD1RSW_IMM: - case AArch64::LD1RD_IMM: - - case AArch64::LDNT1B_ZRI: - case AArch64::LDNT1H_ZRI: - case AArch64::LDNT1W_ZRI: - case AArch64::LDNT1D_ZRI: + case AArch64::STGPi: + case AArch64::STNPDi: + case AArch64::STNPQi: + case AArch64::STNPSi: + case AArch64::STNPWi: + case AArch64::STNPXi: case AArch64::STNT1B_ZRI: + case AArch64::STNT1D_ZRI: case AArch64::STNT1H_ZRI: case AArch64::STNT1W_ZRI: - case AArch64::STNT1D_ZRI: - - case AArch64::LDNF1B_IMM: - case AArch64::LDNF1B_H_IMM: - case AArch64::LDNF1B_S_IMM: - case AArch64::LDNF1B_D_IMM: - case AArch64::LDNF1SB_H_IMM: - case AArch64::LDNF1SB_S_IMM: - case AArch64::LDNF1SB_D_IMM: - case AArch64::LDNF1H_IMM: - case AArch64::LDNF1H_S_IMM: - case AArch64::LDNF1H_D_IMM: - case AArch64::LDNF1SH_S_IMM: - case AArch64::LDNF1SH_D_IMM: - case AArch64::LDNF1W_IMM: - case AArch64::LDNF1W_D_IMM: - case AArch64::LDNF1SW_D_IMM: - case AArch64::LDNF1D_IMM: + case AArch64::STPDi: + case AArch64::STPQi: + case AArch64::STPSi: + case AArch64::STPWi: + case AArch64::STPXi: + case AArch64::STRBBpost: + case AArch64::STRBBpre: + case AArch64::STRBpost: + case AArch64::STRBpre: + case AArch64::STRDpost: + case AArch64::STRDpre: + case AArch64::STRHHpost: + case AArch64::STRHHpre: + case AArch64::STRHpost: + case AArch64::STRHpre: + case AArch64::STRQpost: + case AArch64::STRQpre: + case AArch64::STRSpost: + case AArch64::STRSpre: + case AArch64::STRWpost: + case AArch64::STRWpre: + case AArch64::STRXpost: + case AArch64::STRXpre: return 3; - case AArch64::ADDG: - case AArch64::STGi: - case AArch64::LDR_PXI: - case AArch64::STR_PXI: - return 2; + case AArch64::LDPDpost: + case AArch64::LDPDpre: + case AArch64::LDPQpost: + case AArch64::LDPQpre: + case AArch64::LDPSpost: + case AArch64::LDPSpre: + case AArch64::LDPWpost: + case AArch64::LDPWpre: + case AArch64::LDPXpost: + case AArch64::LDPXpre: + case AArch64::STPDpost: + case AArch64::STPDpre: + case AArch64::STPQpost: + case AArch64::STPQpre: + case AArch64::STPSpost: + case AArch64::STPSpre: + case AArch64::STPWpost: + case AArch64::STPWpre: + case AArch64::STPXpost: + case AArch64::STPXpre: + return 4; } } @@ -3834,22 +3960,58 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MinOffset = -256; MaxOffset = 255; break; - case AArch64::STRXpre: - case AArch64::STRDpre: - case AArch64::LDRXpost: case AArch64::LDRDpost: + case AArch64::LDRDpre: + case AArch64::LDRXpost: + case AArch64::LDRXpre: + case AArch64::STRDpost: + case AArch64::STRDpre: + case AArch64::STRXpost: + case AArch64::STRXpre: Scale = TypeSize::getFixed(1); Width = TypeSize::getFixed(8); MinOffset = -256; MaxOffset = 255; break; case AArch64::STRWpost: + case AArch64::STRWpre: case AArch64::LDRWpost: + case AArch64::LDRWpre: + case AArch64::STRSpost: + case AArch64::STRSpre: + case AArch64::LDRSpost: + case AArch64::LDRSpre: Scale = TypeSize::getFixed(1); Width = TypeSize::getFixed(4); MinOffset = -256; MaxOffset = 255; break; + case AArch64::LDRHpost: + case AArch64::LDRHpre: + case AArch64::STRHpost: + case AArch64::STRHpre: + case AArch64::LDRHHpost: + case AArch64::LDRHHpre: + case AArch64::STRHHpost: + case AArch64::STRHHpre: + Scale = TypeSize::getFixed(1); + Width = TypeSize::getFixed(2); + MinOffset = -256; + MaxOffset = 255; + break; + case AArch64::LDRBpost: + case AArch64::LDRBpre: + case AArch64::STRBpost: + case AArch64::STRBpre: + case AArch64::LDRBBpost: + case AArch64::LDRBBpre: + case AArch64::STRBBpost: + case AArch64::STRBBpre: + Scale = TypeSize::getFixed(1); + Width = TypeSize::getFixed(1); + MinOffset = -256; + MaxOffset = 255; + break; // Unscaled case AArch64::LDURQi: case AArch64::STURQi: @@ -3918,8 +4080,10 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::LDNPQi: case AArch64::STPQi: case AArch64::STNPQi: - case AArch64::STPQpre: case AArch64::LDPQpost: + case AArch64::LDPQpre: + case AArch64::STPQpost: + case AArch64::STPQpre: Scale = TypeSize::getFixed(16); Width = TypeSize::getFixed(16 * 2); MinOffset = -64; @@ -3933,10 +4097,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::STPDi: case AArch64::STNPXi: case AArch64::STNPDi: - case AArch64::STPXpre: + case AArch64::LDPDpost: + case AArch64::LDPDpre: case AArch64::LDPXpost: + case AArch64::LDPXpre: + case AArch64::STPDpost: case AArch64::STPDpre: - case AArch64::LDPDpost: + case AArch64::STPXpost: + case AArch64::STPXpre: Scale = TypeSize::getFixed(8); Width = TypeSize::getFixed(8 * 2); MinOffset = -64; @@ -3950,6 +4118,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::STPSi: case AArch64::STNPWi: case AArch64::STNPSi: + case AArch64::LDPSpost: + case AArch64::LDPSpre: + case AArch64::LDPWpost: + case AArch64::LDPWpre: + case AArch64::STPSpost: + case AArch64::STPSpre: + case AArch64::STPWpost: + case AArch64::STPWpre: Scale = TypeSize::getFixed(4); Width = TypeSize::getFixed(4 * 2); MinOffset = -64; @@ -10250,6 +10426,28 @@ AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { Init, IsUpdatePriorComp, Cond); } +/// verifyInstruction - Perform target specific instruction verification. +bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const { + + // Verify that immediate offsets on load/store instructions are within range. + // Stack objects with an FI operand are excluded as they can be fixed up + // during PEI. + TypeSize Scale(0U, false), Width(0U, false); + int64_t MinOffset, MaxOffset; + if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) { + unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode()); + if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) { + int64_t Imm = MI.getOperand(ImmIdx).getImm(); + if (Imm < MinOffset || Imm > MaxOffset) { + ErrInfo = "Unexpected immediate on load/store instruction"; + return false; + } + } + } + return true; +} + #define GET_INSTRINFO_HELPERS #define GET_INSTRMAP_INFO #include "AArch64GenInstrInfo.inc" diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 24a500aa190abbc..1d2f69bd8530263 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -583,6 +583,10 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { std::optional canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask, MachineInstr *Pred, const MachineRegisterInfo *MRI) const; + + /// verifyInstruction - Perform target specific instruction verification. + bool verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const override; }; struct UsedNZCV { diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-unsafe-stack-call.mir b/llvm/test/CodeGen/AArch64/machine-outliner-unsafe-stack-call.mir index 0853b23e52916d5..679c9687e66f039 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-unsafe-stack-call.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-unsafe-stack-call.mir @@ -25,10 +25,10 @@ body: | ; CHECK-DAG: baz $lr = ORRXri $xzr, 1 BL @foo, implicit-def dead $lr, implicit $sp - $x20, $x19 = LDPXi $sp, 255 - $x20, $x19 = LDPXi $sp, 255 - $x20, $x19 = LDPXi $sp, 255 - $x20, $x19 = LDPXi $sp, 255 + $x20, $x19 = LDPXi $sp, 63 + $x20, $x19 = LDPXi $sp, 63 + $x20, $x19 = LDPXi $sp, 63 + $x20, $x19 = LDPXi $sp, 63 bb.1: BL @bar, implicit-def dead $lr, implicit $sp $x11 = ADDXri $sp, 48, 0; @@ -55,10 +55,10 @@ body: | ; CHECK-DAG: baz $lr = ORRXri $xzr, 1 BL @foo, implicit-def dead $lr, implicit $sp - $x20, $x19 = LDPXi $sp, 255 - $x20, $x19 = LDPXi $sp, 255 - $x20, $x19 = LDPXi $sp, 255 - $x20, $x19 = LDPXi $sp, 255 + $x20, $x19 = LDPXi $sp, 63 + $x20, $x19 = LDPXi $sp, 63 + $x20, $x19 = LDPXi $sp, 63 + $x20, $x19 = LDPXi $sp, 63 bb.1: BL @bar, implicit-def dead $lr, implicit $sp $x11 = ADDXri $sp, 48, 0; diff --git a/llvm/test/CodeGen/AArch64/verify-memop.mir b/llvm/test/CodeGen/AArch64/verify-memop.mir new file mode 100644 index 000000000000000..90aa5e64a7f7209 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/verify-memop.mir @@ -0,0 +1,51 @@ +# RUN: not --crash llc -mtriple=aarch64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code" %s + +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: STRSui $s1, $x0, 4096 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: STRSui $s1, $x0, -1 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: early-clobber $x0, $w1 = LDRWpre $x0(tied-def 0), 256 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: early-clobber $x0, $w1 = LDRWpre $x0(tied-def 0), -257 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: early-clobber $x0, $w1 = LDRWpre $x0(tied-def 0), 256 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: early-clobber $x0, $w1 = LDRWpre $x0(tied-def 0), -257 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: STRBBui $w1, $x0, 4096 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: STRBBui $w1, $x0, -1 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: STRHHui $w1, $x0, 4096 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: STRHHui $w1, $x0, -1 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: early-clobber $x0 = STRSpost $s1, $x0(tied-def 0), 256 +# CHECK: *** Bad machine code: Unexpected immediate on load/store instruction *** +# CHECK: - instruction: early-clobber $x0 = STRSpost $s1, $x0(tied-def 0), -257 + +--- +name: testoffsets +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $d0, $s1, $q2, $x0, $w1 + + STRSui $s1, $x0, 4095 + STRSui $s1, $x0, 4096 + STRSui $s1, $x0, 0 + STRSui $s1, $x0, -1 + $x0, $w1 = LDRWpre $x0, 256 + $x0, $w1 = LDRWpre $x0, -257 + $x0, $w1 = LDRWpre $x0, 256 + $x0, $w1 = LDRWpre $x0, -257 + STRBBui $w1, $x0, 4096 + STRBBui $w1, $x0, -1 + STRHHui $w1, $x0, 4096 + STRHHui $w1, $x0, -1 + $x0 = STRSpost $s1, $x0, 256 + $x0 = STRSpost $s1, $x0, -257 + RET undef $lr, implicit $x0 +... From 33190490c667aaf8b08d5af8b8ce84524f856e80 Mon Sep 17 00:00:00 2001 From: zhongyunde 00443407 Date: Fri, 10 Nov 2023 07:29:03 -0500 Subject: [PATCH 006/441] [AArch64] merge index address with large offset into base address A case for this transformation, https://gcc.godbolt.org/z/nhYcWq1WE Fold mov w8, #56952 movk w8, #15, lsl #16 ldrb w0, [x0, x8] into add x0, x0, 1036288 ldrb w0, [x0, 3704] Only LDRBBroX is supported for the first time. Fix https://github.com/llvm/llvm-project/issues/71917 Note: This PR is try relanding the commit 32878c2065 with fix crash for PR79756 this crash is exposes when there is MOVKWi instruction in the head of a block, but without MOVZWi --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 10 + llvm/lib/Target/AArch64/AArch64InstrInfo.h | 3 + .../AArch64/AArch64LoadStoreOptimizer.cpp | 232 ++++++++++++++++++ llvm/test/CodeGen/AArch64/arm64-addrmode.ll | 15 +- .../AArch64/large-offset-ldr-merge.mir | 25 +- 5 files changed, 273 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5f8ed9cb6180a2c..ff17986208d8aab 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4516,6 +4516,16 @@ AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) { return MI.getOperand(Idx); } +const MachineOperand & +AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case AArch64::LDRBBroX: + return MI.getOperand(4); + } +} + static const TargetRegisterClass *getRegClass(const MachineInstr &MI, Register Reg) { if (MI.getParent() == nullptr) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 1d2f69bd8530263..a1f2fbff016312a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -254,6 +254,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// Returns whether the physical register is FP or NEON. static bool isFpOrNEON(Register Reg); + /// Returns the shift amount operator of a load/store. + static const MachineOperand &getLdStAmountOp(const MachineInstr &MI); + /// Returns whether the instruction is FP or NEON. static bool isFpOrNEON(const MachineInstr &MI); diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index ff3a21cd0da3b7c..8de3f8db84ae2b7 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -64,6 +64,8 @@ STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation " "not passed the alignment check"); +STATISTIC(NumConstOffsetFolded, + "Number of const offset of index address folded"); DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming"); @@ -77,6 +79,11 @@ static cl::opt LdStLimit("aarch64-load-store-scan-limit", static cl::opt UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); +// The LdStConstLimit limits how far we search for const offset instructions +// when we form index address load/store instructions. +static cl::opt LdStConstLimit("aarch64-load-store-const-scan-limit", + cl::init(10), cl::Hidden); + // Enable register renaming to find additional store pairing opportunities. static cl::opt EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden); @@ -173,6 +180,13 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit); + // Scan the instruction list to find a register assigned with a const + // value that can be combined with the current instruction (a load or store) + // using base addressing with writeback. Scan backwards. + MachineBasicBlock::iterator + findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit, + unsigned &Offset); + // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using // pre or post indexed addressing with writeback. Scan backwards. @@ -184,11 +198,19 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset); + bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI, + unsigned IndexReg, unsigned &Offset); + // Merge a pre- or post-index base register update into a ld/st instruction. MachineBasicBlock::iterator mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, bool IsPreIdx); + MachineBasicBlock::iterator + mergeConstOffsetInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update, unsigned Offset, + int Scale); + // Find and merge zero store instructions. bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); @@ -201,6 +223,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and merge a base register updates before or after a ld/st instruction. bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI); + // Find and merge an index ldr/st instruction into a base ld/st instruction. + bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale); + bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); bool runOnMachineFunction(MachineFunction &Fn) override; @@ -483,6 +508,16 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { } } +static unsigned getBaseAddressOpcode(unsigned Opc) { + // TODO: Add more index address loads/stores. + switch (Opc) { + default: + llvm_unreachable("Opcode has no base address equivalent!"); + case AArch64::LDRBBroX: + return AArch64::LDRBBui; + } +} + static unsigned getPostIndexedOpcode(unsigned Opc) { switch (Opc) { default: @@ -724,6 +759,20 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) { } } +// Make sure this is a reg+reg Ld/St +static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: + return false; + // Scaled instructions. + // TODO: Add more index address loads/stores. + case AArch64::LDRBBroX: + Scale = 1; + return true; + } +} + static bool isRewritableImplicitDef(unsigned Opc) { switch (Opc) { default: @@ -2053,6 +2102,63 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, return NextI; } +MachineBasicBlock::iterator +AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update, + unsigned Offset, int Scale) { + assert((Update->getOpcode() == AArch64::MOVKWi) && + "Unexpected const mov instruction to merge!"); + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); + MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E); + MachineInstr &MemMI = *I; + unsigned Mask = (1 << 12) * Scale - 1; + unsigned Low = Offset & Mask; + unsigned High = Offset - Low; + Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg(); + Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg(); + MachineInstrBuilder AddMIB, MemMIB; + + // Add IndexReg, BaseReg, High (the BaseReg may be SP) + AddMIB = + BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri)) + .addDef(IndexReg) + .addUse(BaseReg) + .addImm(High >> 12) // shifted value + .addImm(12); // shift 12 + (void)AddMIB; + // Ld/St DestReg, IndexReg, Imm12 + unsigned NewOpc = getBaseAddressOpcode(I->getOpcode()); + MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) + .add(getLdStRegOp(MemMI)) + .add(AArch64InstrInfo::getLdStOffsetOp(MemMI)) + .addImm(Low / Scale) + .setMemRefs(I->memoperands()) + .setMIFlags(I->mergeFlagsWith(*Update)); + (void)MemMIB; + + ++NumConstOffsetFolded; + LLVM_DEBUG(dbgs() << "Creating base address load/store.\n"); + LLVM_DEBUG(dbgs() << " Replacing instructions:\n "); + LLVM_DEBUG(PrevI->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(Update->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(I->print(dbgs())); + LLVM_DEBUG(dbgs() << " with instruction:\n "); + LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs())); + LLVM_DEBUG(dbgs() << "\n"); + + // Erase the old instructions for the block. + I->eraseFromParent(); + PrevI->eraseFromParent(); + Update->eraseFromParent(); + + return NextI; +} + bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset) { @@ -2100,6 +2206,34 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, return false; } +bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI, + MachineInstr &MI, + unsigned IndexReg, + unsigned &Offset) { + // The update instruction source and destination register must be the + // same as the load/store index register. + if (MI.getOpcode() == AArch64::MOVKWi && + TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) { + + // movz + movk hold a large offset of a Ld/St instruction. + MachineBasicBlock::iterator B = MI.getParent()->begin(); + MachineBasicBlock::iterator MBBI = &MI; + // Skip the scene when the MI is the first instruction of a block. + if (MBBI == B) + return false; + MBBI = prev_nodbg(MBBI, B); + MachineInstr &MovzMI = *MBBI; + if (MovzMI.getOpcode() == AArch64::MOVZWi) { + unsigned Low = MovzMI.getOperand(1).getImm(); + unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm(); + Offset = High + Low; + // 12-bit optionally shifted immediates are legal for adds. + return Offset >> 24 == 0; + } + } + return false; +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); @@ -2255,6 +2389,60 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; } +MachineBasicBlock::iterator +AArch64LoadStoreOpt::findMatchingConstOffsetBackward( + MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) { + MachineBasicBlock::iterator B = I->getParent()->begin(); + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineInstr &MemMI = *I; + MachineBasicBlock::iterator MBBI = I; + + // If the load is the first instruction in the block, there's obviously + // not any matching load or store. + if (MBBI == B) + return E; + + // Make sure the IndexReg is killed and the shift amount is zero. + // TODO: Relex this restriction to extend, simplify processing now. + if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() || + !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() || + (AArch64InstrInfo::getLdStAmountOp(MemMI).getImm() != 0)) + return E; + + Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg(); + + // Track which register units have been modified and used between the first + // insn (inclusive) and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + unsigned Count = 0; + do { + MBBI = prev_nodbg(MBBI, B); + MachineInstr &MI = *MBBI; + + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) + ++Count; + + // If we found a match, return it. + if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) { + return MBBI; + } + + // Update the status of what the instruction clobbered and used. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + + // Otherwise, if the index register is used or modified, we have no match, + // so return early. + if (!ModifiedRegUnits.available(IndexReg) || + !UsedRegUnits.available(IndexReg)) + return E; + + } while (MBBI != B && Count < Limit); + return E; +} + bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -2443,6 +2631,34 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate return false; } +bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, + int Scale) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator E = MI.getParent()->end(); + MachineBasicBlock::iterator Update; + + // Don't know how to handle unscaled pre/post-index versions below, so bail. + if (TII->hasUnscaledLdStOffset(MI.getOpcode())) + return false; + + // Look back to try to find a const offset for index LdSt instruction. For + // example, + // mov x8, #LargeImm ; = a * (1<<12) + imm12 + // ldr x1, [x0, x8] + // merged into: + // add x8, x0, a * (1<<12) + // ldr x1, [x8, imm12] + unsigned Offset; + Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset); + if (Update != E && (Offset & (Scale - 1)) == 0) { + // Merge the imm12 into the ld/st. + MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale); + return true; + } + + return false; +} + bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { @@ -2521,6 +2737,22 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++MBBI; } + // 5) Find a register assigned with a const value that can be combined with + // into the load or store. e.g., + // mov x8, #LargeImm ; = a * (1<<12) + imm12 + // ldr x1, [x0, x8] + // ; becomes + // add x8, x0, a * (1<<12) + // ldr x1, [x8, imm12] + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + int Scale; + if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale)) + Modified = true; + else + ++MBBI; + } + return Modified; } diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll index d39029163a47aa6..2181eaaee7db686 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -214,9 +214,8 @@ define void @t17(i64 %a) { define i8 @LdOffset_i8(ptr %a) { ; CHECK-LABEL: LdOffset_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #56952 // =0xde78 -; CHECK-NEXT: movk w8, #15, lsl #16 -; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrb w0, [x8, #3704] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 @@ -227,9 +226,8 @@ define i8 @LdOffset_i8(ptr %a) { define i32 @LdOffset_i8_zext32(ptr %a) { ; CHECK-LABEL: LdOffset_i8_zext32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #56952 // =0xde78 -; CHECK-NEXT: movk w8, #15, lsl #16 -; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrb w0, [x8, #3704] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 @@ -255,9 +253,8 @@ define i32 @LdOffset_i8_sext32(ptr %a) { define i64 @LdOffset_i8_zext64(ptr %a) { ; CHECK-LABEL: LdOffset_i8_zext64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #56952 // =0xde78 -; CHECK-NEXT: movk w8, #15, lsl #16 -; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrb w0, [x8, #3704] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 diff --git a/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir b/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir index 488f1ffdb52f3b2..473c74323d93998 100644 --- a/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir +++ b/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir @@ -14,9 +14,8 @@ body: | ; CHECK-LABEL: name: LdOffset ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = MOVZWi 56952, 0 - ; CHECK-NEXT: renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8 - ; CHECK-NEXT: renamable $w0 = LDRBBroX killed renamable $x0, killed renamable $x8, 0, 0 + ; CHECK-NEXT: $x8 = ADDXri $x0, 253, 12 + ; CHECK-NEXT: renamable $w0 = LDRBBui killed renamable $x8, 3704 ; CHECK-NEXT: RET undef $lr, implicit $w0 renamable $w8 = MOVZWi 56952, 0 renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8 @@ -46,3 +45,23 @@ body: | renamable $w0 = LDRBBroX killed renamable $x0, renamable $x8, 0, 0 RET undef $lr, implicit $w0 ... + +# Negative test: No MOVZWi used for the const offset +--- +name: LdOffset_missing_MOVZ +tracksRegLiveness: true +liveins: + - { reg: '$x0', virtual-reg: '' } +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: LdOffset_missing_MOVZ + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8 + ; CHECK-NEXT: renamable $w0 = LDRBBroX killed renamable $x0, killed renamable $x8, 0, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8 + renamable $w0 = LDRBBroX killed renamable $x0, killed renamable $x8, 0, 0 + RET undef $lr, implicit $w0 From 43ffe2eed0d9f73789dbe213023733d164999306 Mon Sep 17 00:00:00 2001 From: zhongyunde 00443407 Date: Mon, 18 Dec 2023 00:51:48 -0500 Subject: [PATCH 007/441] [AArch64] Fold more load.x into load.i with large offset The list of load.x is refer to canFoldIntoAddrMode on D152828. Also support LDRSroX missed in canFoldIntoAddrMode --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 13 +++ .../AArch64/AArch64LoadStoreOptimizer.cpp | 51 ++++++++++- llvm/test/CodeGen/AArch64/arm64-addrmode.ll | 85 ++++++++----------- 3 files changed, 96 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index ff17986208d8aab..d818d3ba51c59d2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4521,7 +4521,20 @@ AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) { switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected opcode"); + case AArch64::LDRBroX: case AArch64::LDRBBroX: + case AArch64::LDRSBXroX: + case AArch64::LDRSBWroX: + case AArch64::LDRHroX: + case AArch64::LDRHHroX: + case AArch64::LDRSHXroX: + case AArch64::LDRSHWroX: + case AArch64::LDRWroX: + case AArch64::LDRSroX: + case AArch64::LDRSWroX: + case AArch64::LDRDroX: + case AArch64::LDRXroX: + case AArch64::LDRQroX: return MI.getOperand(4); } } diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 8de3f8db84ae2b7..de1727aa6ec70f7 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -509,12 +509,38 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { } static unsigned getBaseAddressOpcode(unsigned Opc) { - // TODO: Add more index address loads/stores. + // TODO: Add more index address stores. switch (Opc) { default: llvm_unreachable("Opcode has no base address equivalent!"); + case AArch64::LDRBroX: + return AArch64::LDRBui; case AArch64::LDRBBroX: return AArch64::LDRBBui; + case AArch64::LDRSBXroX: + return AArch64::LDRSBXui; + case AArch64::LDRSBWroX: + return AArch64::LDRSBWui; + case AArch64::LDRHroX: + return AArch64::LDRHui; + case AArch64::LDRHHroX: + return AArch64::LDRHHui; + case AArch64::LDRSHXroX: + return AArch64::LDRSHXui; + case AArch64::LDRSHWroX: + return AArch64::LDRSHWui; + case AArch64::LDRWroX: + return AArch64::LDRWui; + case AArch64::LDRSroX: + return AArch64::LDRSui; + case AArch64::LDRSWroX: + return AArch64::LDRSWui; + case AArch64::LDRDroX: + return AArch64::LDRDui; + case AArch64::LDRXroX: + return AArch64::LDRXui; + case AArch64::LDRQroX: + return AArch64::LDRQui; } } @@ -766,10 +792,31 @@ static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) { default: return false; // Scaled instructions. - // TODO: Add more index address loads/stores. + // TODO: Add more index address stores. + case AArch64::LDRBroX: case AArch64::LDRBBroX: + case AArch64::LDRSBXroX: + case AArch64::LDRSBWroX: Scale = 1; return true; + case AArch64::LDRHroX: + case AArch64::LDRHHroX: + case AArch64::LDRSHXroX: + case AArch64::LDRSHWroX: + Scale = 2; + return true; + case AArch64::LDRWroX: + case AArch64::LDRSroX: + case AArch64::LDRSWroX: + Scale = 4; + return true; + case AArch64::LDRDroX: + case AArch64::LDRXroX: + Scale = 8; + return true; + case AArch64::LDRQroX: + Scale = 16; + return true; } } diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll index 2181eaaee7db686..bfef61abd8c129f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -239,9 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a) { define i32 @LdOffset_i8_sext32(ptr %a) { ; CHECK-LABEL: LdOffset_i8_sext32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #56952 // =0xde78 -; CHECK-NEXT: movk w8, #15, lsl #16 -; CHECK-NEXT: ldrsb w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrsb w0, [x8, #3704] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 @@ -266,9 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a) { define i64 @LdOffset_i8_sext64(ptr %a) { ; CHECK-LABEL: LdOffset_i8_sext64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #56952 // =0xde78 -; CHECK-NEXT: movk w8, #15, lsl #16 -; CHECK-NEXT: ldrsb x0, [x0, x8] +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrsb x0, [x8, #3704] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 @@ -280,9 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a) { define i16 @LdOffset_i16(ptr %a) { ; CHECK-LABEL: LdOffset_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #48368 // =0xbcf0 -; CHECK-NEXT: movk w8, #31, lsl #16 -; CHECK-NEXT: ldrh w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrh w0, [x8, #7408] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 %val = load i16, ptr %arrayidx, align 2 @@ -293,9 +290,8 @@ define i16 @LdOffset_i16(ptr %a) { define i32 @LdOffset_i16_zext32(ptr %a) { ; CHECK-LABEL: LdOffset_i16_zext32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #48368 // =0xbcf0 -; CHECK-NEXT: movk w8, #31, lsl #16 -; CHECK-NEXT: ldrh w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrh w0, [x8, #7408] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 %val = load i16, ptr %arrayidx, align 2 @@ -307,9 +303,8 @@ define i32 @LdOffset_i16_zext32(ptr %a) { define i32 @LdOffset_i16_sext32(ptr %a) { ; CHECK-LABEL: LdOffset_i16_sext32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #48368 // =0xbcf0 -; CHECK-NEXT: movk w8, #31, lsl #16 -; CHECK-NEXT: ldrsh w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrsh w0, [x8, #7408] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 %val = load i16, ptr %arrayidx, align 2 @@ -321,9 +316,8 @@ define i32 @LdOffset_i16_sext32(ptr %a) { define i64 @LdOffset_i16_zext64(ptr %a) { ; CHECK-LABEL: LdOffset_i16_zext64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #48368 // =0xbcf0 -; CHECK-NEXT: movk w8, #31, lsl #16 -; CHECK-NEXT: ldrh w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrh w0, [x8, #7408] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 %val = load i16, ptr %arrayidx, align 2 @@ -335,9 +329,8 @@ define i64 @LdOffset_i16_zext64(ptr %a) { define i64 @LdOffset_i16_sext64(ptr %a) { ; CHECK-LABEL: LdOffset_i16_sext64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #48368 // =0xbcf0 -; CHECK-NEXT: movk w8, #31, lsl #16 -; CHECK-NEXT: ldrsh x0, [x0, x8] +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrsh x0, [x8, #7408] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 %val = load i16, ptr %arrayidx, align 2 @@ -349,9 +342,8 @@ define i64 @LdOffset_i16_sext64(ptr %a) { define i32 @LdOffset_i32(ptr %a) { ; CHECK-LABEL: LdOffset_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31200 // =0x79e0 -; CHECK-NEXT: movk w8, #63, lsl #16 -; CHECK-NEXT: ldr w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152 +; CHECK-NEXT: ldr w0, [x8, #14816] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992 %val = load i32, ptr %arrayidx, align 4 @@ -362,9 +354,8 @@ define i32 @LdOffset_i32(ptr %a) { define i64 @LdOffset_i32_zext64(ptr %a) { ; CHECK-LABEL: LdOffset_i32_zext64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31200 // =0x79e0 -; CHECK-NEXT: movk w8, #63, lsl #16 -; CHECK-NEXT: ldr w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152 +; CHECK-NEXT: ldr w0, [x8, #14816] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992 %val = load i32, ptr %arrayidx, align 2 @@ -376,9 +367,8 @@ define i64 @LdOffset_i32_zext64(ptr %a) { define i64 @LdOffset_i32_sext64(ptr %a) { ; CHECK-LABEL: LdOffset_i32_sext64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31200 // =0x79e0 -; CHECK-NEXT: movk w8, #63, lsl #16 -; CHECK-NEXT: ldrsw x0, [x0, x8] +; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152 +; CHECK-NEXT: ldrsw x0, [x8, #14816] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992 %val = load i32, ptr %arrayidx, align 2 @@ -390,9 +380,8 @@ define i64 @LdOffset_i32_sext64(ptr %a) { define i64 @LdOffset_i64(ptr %a) { ; CHECK-LABEL: LdOffset_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #62400 // =0xf3c0 -; CHECK-NEXT: movk w8, #126, lsl #16 -; CHECK-NEXT: ldr x0, [x0, x8] +; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304 +; CHECK-NEXT: ldr x0, [x8, #29632] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992 %val = load i64, ptr %arrayidx, align 4 @@ -403,9 +392,8 @@ define i64 @LdOffset_i64(ptr %a) { define <2 x i32> @LdOffset_v2i32(ptr %a) { ; CHECK-LABEL: LdOffset_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #62400 // =0xf3c0 -; CHECK-NEXT: movk w8, #126, lsl #16 -; CHECK-NEXT: ldr d0, [x0, x8] +; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304 +; CHECK-NEXT: ldr d0, [x8, #29632] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992 %val = load <2 x i32>, ptr %arrayidx, align 4 @@ -416,9 +404,8 @@ define <2 x i32> @LdOffset_v2i32(ptr %a) { define <2 x i64> @LdOffset_v2i64(ptr %a) { ; CHECK-LABEL: LdOffset_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #59264 // =0xe780 -; CHECK-NEXT: movk w8, #253, lsl #16 -; CHECK-NEXT: ldr q0, [x0, x8] +; CHECK-NEXT: add x8, x0, #4048, lsl #12 // =16580608 +; CHECK-NEXT: ldr q0, [x8, #59264] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992 %val = load <2 x i64>, ptr %arrayidx, align 4 @@ -429,9 +416,8 @@ define <2 x i64> @LdOffset_v2i64(ptr %a) { define double @LdOffset_i8_f64(ptr %a) { ; CHECK-LABEL: LdOffset_i8_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #56952 // =0xde78 -; CHECK-NEXT: movk w8, #15, lsl #16 -; CHECK-NEXT: ldrsb w8, [x0, x8] +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrsb w8, [x8, #3704] ; CHECK-NEXT: scvtf d0, w8 ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 @@ -444,9 +430,8 @@ define double @LdOffset_i8_f64(ptr %a) { define double @LdOffset_i16_f64(ptr %a) { ; CHECK-LABEL: LdOffset_i16_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #48368 // =0xbcf0 -; CHECK-NEXT: movk w8, #31, lsl #16 -; CHECK-NEXT: ldrsh w8, [x0, x8] +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrsh w8, [x8, #7408] ; CHECK-NEXT: scvtf d0, w8 ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 @@ -459,9 +444,8 @@ define double @LdOffset_i16_f64(ptr %a) { define double @LdOffset_i32_f64(ptr %a) { ; CHECK-LABEL: LdOffset_i32_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31200 // =0x79e0 -; CHECK-NEXT: movk w8, #63, lsl #16 -; CHECK-NEXT: ldr s0, [x0, x8] +; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152 +; CHECK-NEXT: ldr s0, [x8, #14816] ; CHECK-NEXT: ucvtf d0, d0 ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992 @@ -474,9 +458,8 @@ define double @LdOffset_i32_f64(ptr %a) { define double @LdOffset_i64_f64(ptr %a) { ; CHECK-LABEL: LdOffset_i64_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #62400 // =0xf3c0 -; CHECK-NEXT: movk w8, #126, lsl #16 -; CHECK-NEXT: ldr d0, [x0, x8] +; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304 +; CHECK-NEXT: ldr d0, [x8, #29632] ; CHECK-NEXT: scvtf d0, d0 ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992 From 79658d65c3c7a075382b74d81e74714e2ea9bd2d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 15 Aug 2024 14:42:08 +0400 Subject: [PATCH 008/441] InferAddressSpaces: Make getPredicatedAddrSpace less confusing (#104052) This takes a pointer value and the user instruction. Name them as such, and remove the null check which should be dead. --- .../Transforms/Scalar/InferAddressSpaces.cpp | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index a5571409bba6824..0c8aee8a494c031 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -240,7 +240,8 @@ class InferAddressSpacesImpl { SmallVectorImpl *PoisonUsesToFix) const; unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; - unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const; + unsigned getPredicatedAddrSpace(const Value &PtrV, + const Instruction *UserCtxI) const; public: InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT, @@ -909,18 +910,14 @@ void InferAddressSpacesImpl::inferAddressSpaces( } } -unsigned InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &V, - Value *Opnd) const { - const Instruction *I = dyn_cast(&V); - if (!I) - return UninitializedAddressSpace; - - Opnd = Opnd->stripInBoundsOffsets(); - for (auto &AssumeVH : AC.assumptionsFor(Opnd)) { +unsigned InferAddressSpacesImpl::getPredicatedAddrSpace( + const Value &Ptr, const Instruction *UserCtxI) const { + const Value *StrippedPtr = Ptr.stripInBoundsOffsets(); + for (auto &AssumeVH : AC.assumptionsFor(StrippedPtr)) { if (!AssumeVH) continue; CallInst *CI = cast(AssumeVH); - if (!isValidAssumeForContext(CI, I, DT)) + if (!isValidAssumeForContext(CI, UserCtxI, DT)) continue; const Value *Ptr; @@ -989,7 +986,8 @@ bool InferAddressSpacesImpl::updateAddressSpace( OperandAS = PtrOperand->getType()->getPointerAddressSpace(); if (OperandAS == FlatAddrSpace) { // Check AC for assumption dominating V. - unsigned AS = getPredicatedAddrSpace(V, PtrOperand); + unsigned AS = + getPredicatedAddrSpace(*PtrOperand, &cast(V)); if (AS != UninitializedAddressSpace) { LLVM_DEBUG(dbgs() << " deduce operand AS from the predicate addrspace " From 100c9c019cebf49427d9f3ea93db65f7e448a102 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Thu, 15 Aug 2024 13:52:25 +0300 Subject: [PATCH 009/441] [DataLayout] Add helper predicates to sort specifications (NFC) (#104417) --- llvm/lib/IR/DataLayout.cpp | 47 +++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index 44cd1e69818953d..5f9a0fbc61905fc 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -153,6 +153,24 @@ bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const { IndexBitWidth == Other.IndexBitWidth; } +namespace { +/// Predicate to sort primitive specs by bit width. +struct LessPrimitiveBitWidth { + bool operator()(const DataLayout::PrimitiveSpec &LHS, + unsigned RHSBitWidth) const { + return LHS.BitWidth < RHSBitWidth; + } +}; + +/// Predicate to sort pointer specs by address space number. +struct LessPointerAddrSpace { + bool operator()(const DataLayout::PointerSpec &LHS, + unsigned RHSAddrSpace) const { + return LHS.AddrSpace < RHSAddrSpace; + } +}; +} // namespace + const char *DataLayout::getManglingComponent(const Triple &T) { if (T.isOSBinFormatGOFF()) return "-m:l"; @@ -581,15 +599,6 @@ Error DataLayout::parseSpecifier(StringRef Desc) { return Error::success(); } -static SmallVectorImpl::const_iterator -findPrimitiveSpecLowerBound( - const SmallVectorImpl &Specs, - uint32_t BitWidth) { - return partition_point(Specs, [BitWidth](const DataLayout::PrimitiveSpec &E) { - return E.BitWidth < BitWidth; - }); -} - Error DataLayout::setPrimitiveSpec(TypeSpecifier Specifier, uint32_t BitWidth, Align ABIAlign, Align PrefAlign) { // AlignmentsTy::ABIAlign and AlignmentsTy::PrefAlign were once stored as @@ -620,9 +629,7 @@ Error DataLayout::setPrimitiveSpec(TypeSpecifier Specifier, uint32_t BitWidth, break; } - auto I = partition_point(*Specs, [BitWidth](const PrimitiveSpec &E) { - return E.BitWidth < BitWidth; - }); + auto I = lower_bound(*Specs, BitWidth, LessPrimitiveBitWidth()); if (I != Specs->end() && I->BitWidth == BitWidth) { // Update the abi, preferred alignments. I->ABIAlign = ABIAlign; @@ -637,10 +644,7 @@ Error DataLayout::setPrimitiveSpec(TypeSpecifier Specifier, uint32_t BitWidth, const DataLayout::PointerSpec & DataLayout::getPointerSpec(uint32_t AddrSpace) const { if (AddrSpace != 0) { - auto I = lower_bound(PointerSpecs, AddrSpace, - [](const PointerSpec &Spec, uint32_t AddrSpace) { - return Spec.AddrSpace < AddrSpace; - }); + auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace()); if (I != PointerSpecs.end() && I->AddrSpace == AddrSpace) return *I; } @@ -658,10 +662,7 @@ Error DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, if (IndexBitWidth > BitWidth) return reportError("Index width cannot be larger than pointer width"); - auto I = lower_bound(PointerSpecs, AddrSpace, - [](const PointerSpec &A, uint32_t AddrSpace) { - return A.AddrSpace < AddrSpace; - }); + auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace()); if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) { PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth}); @@ -676,7 +677,7 @@ Error DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align DataLayout::getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const { - auto I = findPrimitiveSpecLowerBound(IntSpecs, BitWidth); + auto I = lower_bound(IntSpecs, BitWidth, LessPrimitiveBitWidth()); // If we don't have an exact match, use alignment of next larger integer // type. If there is none, use alignment of largest integer type by going // back one element. @@ -792,7 +793,7 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { case Type::FP128TyID: case Type::X86_FP80TyID: { unsigned BitWidth = getTypeSizeInBits(Ty).getFixedValue(); - auto I = findPrimitiveSpecLowerBound(FloatSpecs, BitWidth); + auto I = lower_bound(FloatSpecs, BitWidth, LessPrimitiveBitWidth()); if (I != FloatSpecs.end() && I->BitWidth == BitWidth) return abi_or_pref ? I->ABIAlign : I->PrefAlign; @@ -807,7 +808,7 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { case Type::FixedVectorTyID: case Type::ScalableVectorTyID: { unsigned BitWidth = getTypeSizeInBits(Ty).getKnownMinValue(); - auto I = findPrimitiveSpecLowerBound(VectorSpecs, BitWidth); + auto I = lower_bound(VectorSpecs, BitWidth, LessPrimitiveBitWidth()); if (I != VectorSpecs.end() && I->BitWidth == BitWidth) return abi_or_pref ? I->ABIAlign : I->PrefAlign; From 43ba1097ee747b4ec5e757762ed0c9df6255a292 Mon Sep 17 00:00:00 2001 From: Hua Tian Date: Thu, 15 Aug 2024 19:03:27 +0800 Subject: [PATCH 010/441] [llvm][CodeGen] Resolve issues when updating live intervals in window scheduler (#101945) Corrupted live interval information can cause window scheduling to crash in some cases. By adding the missing MBB's live interval information in the ModuloScheduleExpander, the information can be correctly analyzed in the window scheduler. --- llvm/lib/CodeGen/ModuloSchedule.cpp | 3 + .../CodeGen/Hexagon/swp-ws-live-intervals.mir | 217 ++++++++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-live-intervals.mir diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index dae0cb2c900e5be..78201d9bfb79a97 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -130,6 +130,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() { // Generate the prolog instructions that set up the pipeline. generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs); MF.insert(BB->getIterator(), KernelBB); + LIS.insertMBBInMaps(KernelBB); // Rearrange the instructions to generate the new, pipelined loop, // and update register names as needed. @@ -210,6 +211,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage, NewBB->transferSuccessors(PredBB); PredBB->addSuccessor(NewBB); PredBB = NewBB; + LIS.insertMBBInMaps(NewBB); // Generate instructions for each appropriate stage. Process instructions // in original program order. @@ -283,6 +285,7 @@ void ModuloScheduleExpander::generateEpilog( PredBB->replaceSuccessor(LoopExitBB, NewBB); NewBB->addSuccessor(LoopExitBB); + LIS.insertMBBInMaps(NewBB); if (EpilogStart == LoopExitBB) EpilogStart = NewBB; diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals.mir b/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals.mir new file mode 100644 index 000000000000000..7fa3cdf62d09025 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals.mir @@ -0,0 +1,217 @@ +# REQUIRES: asserts +# +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -window-search-num=100 \ +# RUN: -window-search-ratio=100 -window-diff-limit=0 -verify-machineinstrs \ +# RUN: 2>&1 | FileCheck %s + +# The bug was reported at https://github.com/llvm/llvm-project/pull/99454. +# It is caused by the corruption of live intervals in certain scenarios. +# +# We check the newly generated MBBs after successful scheduling here. +# CHECK: Best window offset is {{[0-9]+}} and Best II is {{[0-9]+}}. +# CHECK: prolog: +# CHECK: bb.5: +# CHECK: New block +# CHECK: bb.6: +# CHECK: epilog: +# CHECK: bb.7: +# CHECK: Best window offset is {{[0-9]+}} and Best II is {{[0-9]+}}. +# CHECK: prolog: +# CHECK: bb.8: +# CHECK: New block +# CHECK: bb.9: +# CHECK: epilog: +# CHECK: bb.10: + +--- | + target triple = "hexagon" + + @_dp_ctrl_calc_tu_temp2_fp = global i64 0 + @_dp_ctrl_calc_tu_temp1_fp = global i32 0 + @dp_panel_update_tu_timings___trans_tmp_5 = global i64 0 + @_dp_ctrl_calc_tu___trans_tmp_8 = global i64 0 + + declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + declare i8 @div64_u64_rem(i32, ptr) + declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + + define void @dp_ctrl_calc_tu_parameters() { + if.end.i: + %rem.i11.i = alloca i64, align 8 + %rem.i.i = alloca i64, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %rem.i11.i) + %call.i.i = call i8 @div64_u64_rem(i32 0, ptr nonnull %rem.i11.i) + %conv1.i.i = zext i8 %call.i.i to i64 + %rem.promoted.i.i = load i64, ptr %rem.i11.i, align 8 + br label %do.body.i.i + + do.body.i.i: + %lsr.iv1 = phi i32 [ %lsr.iv.next2, %do.body.i.i ], [ -32, %if.end.i ] + %sub9.i.i = phi i64 [ %rem.promoted.i.i, %if.end.i ], [ %sub8.i.i.7, %do.body.i.i ] + %res_abs.0.i.i = phi i64 [ %conv1.i.i, %if.end.i ], [ %res_abs.1.i.i.7, %do.body.i.i ] + %cmp.not.i.i = icmp ne i64 %sub9.i.i, 0 + %sub.i.neg.i = sext i1 %cmp.not.i.i to i64 + %sub8.i.i = add i64 %sub9.i.i, %sub.i.neg.i + %0 = shl i64 %res_abs.0.i.i, 2 + %1 = select i1 %cmp.not.i.i, i64 2, i64 0 + %shl.i.i.5 = or disjoint i64 %0, %1 + %cmp.not.i.i.5 = icmp ne i64 %sub8.i.i, 0 + %sub.i.neg.i.5 = sext i1 %cmp.not.i.i.5 to i64 + %sub8.i.i.5 = add i64 %sub8.i.i, %sub.i.neg.i.5 + %or.i.i.5 = zext i1 %cmp.not.i.i.5 to i64 + %res_abs.1.i.i.5 = or disjoint i64 %shl.i.i.5, %or.i.i.5 + %cmp.not.i.i.6 = icmp ne i64 %sub8.i.i.5, 0 + %sub.i.neg.i.6 = sext i1 %cmp.not.i.i.6 to i64 + %sub8.i.i.6 = add i64 %sub8.i.i.5, %sub.i.neg.i.6 + %2 = shl i64 %res_abs.1.i.i.5, 2 + %3 = select i1 %cmp.not.i.i.6, i64 2, i64 0 + %shl.i.i.7 = or disjoint i64 %2, %3 + %cmp.not.i.i.7 = icmp ne i64 %sub8.i.i.6, 0 + %sub.i.neg.i.7 = sext i1 %cmp.not.i.i.7 to i64 + %sub8.i.i.7 = add i64 %sub8.i.i.6, %sub.i.neg.i.7 + %or.i.i.7 = zext i1 %cmp.not.i.i.7 to i64 + %res_abs.1.i.i.7 = or disjoint i64 %shl.i.i.7, %or.i.i.7 + %lsr.iv.next2 = add nsw i32 %lsr.iv1, 8 + %tobool.not.i.i.7 = icmp eq i32 %lsr.iv.next2, 0 + br i1 %tobool.not.i.i.7, label %fec_check.i, label %do.body.i.i + + fec_check.i: + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %rem.i11.i) + store i64 %res_abs.1.i.i.7, ptr @_dp_ctrl_calc_tu_temp2_fp, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %rem.i11.i) + %call.i12.i = call i8 @div64_u64_rem(i32 0, ptr nonnull %rem.i11.i) + %conv1.i13.i = zext i8 %call.i12.i to i64 + %rem.promoted.i14.i = load i64, ptr %rem.i11.i, align 8 + br label %do.body.i15.i + + do.body.i15.i: + %lsr.iv = phi i32 [ %lsr.iv.next, %do.body.i15.i ], [ -32, %fec_check.i ] + %sub9.i16.i = phi i64 [ %rem.promoted.i14.i, %fec_check.i ], [ %sub8.i22.i.7, %do.body.i15.i ] + %res_abs.0.i17.i = phi i64 [ %conv1.i13.i, %fec_check.i ], [ %res_abs.1.i24.i.7, %do.body.i15.i ] + %cmp.not.i20.i = icmp ugt i64 %sub9.i16.i, 999 + %sub.i21.neg.i = select i1 %cmp.not.i20.i, i64 -1000, i64 0 + %sub8.i22.i = add i64 %sub.i21.neg.i, %sub9.i16.i + %4 = shl i64 %res_abs.0.i17.i, 2 + %5 = select i1 %cmp.not.i20.i, i64 2, i64 0 + %shl.i19.i.7 = or disjoint i64 %4, %5 + %cmp.not.i20.i.7 = icmp ugt i64 %sub8.i22.i, 999 + %sub.i21.neg.i.7 = select i1 %cmp.not.i20.i.7, i64 -1000, i64 0 + %sub8.i22.i.7 = add i64 %sub.i21.neg.i.7, %sub8.i22.i + %or.i23.i.7 = zext i1 %cmp.not.i20.i.7 to i64 + %res_abs.1.i24.i.7 = or disjoint i64 %shl.i19.i.7, %or.i23.i.7 + %lsr.iv.next = add nsw i32 %lsr.iv, 8 + %tobool.not.i26.i.7 = icmp eq i32 %lsr.iv.next, 0 + br i1 %tobool.not.i26.i.7, label %_dp_ctrl_calc_tu.exit, label %do.body.i15.i + + _dp_ctrl_calc_tu.exit: + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %rem.i11.i) + %conv.i = trunc i64 %res_abs.1.i24.i.7 to i32 + store i32 %conv.i, ptr @_dp_ctrl_calc_tu_temp1_fp, align 4 + %conv5.i = and i64 %res_abs.1.i24.i.7, 4294967295 + store i64 %conv5.i, ptr @dp_panel_update_tu_timings___trans_tmp_5, align 8 + store i64 %res_abs.1.i.i.7, ptr @_dp_ctrl_calc_tu___trans_tmp_8, align 8 + ret void + } + +... +--- +name: dp_ctrl_calc_tu_parameters +tracksRegLiveness: true +stack: + - { id: 0, name: rem.i11.i, type: default, offset: 0, size: 8, alignment: 8} +body: | + bb.0: + successors: %bb.1(0x80000000) + + %0:intregs = A2_tfrsi 0 + %1:intregs = PS_fi %stack.0.rem.i11.i, 0 + %2:intregs = A2_tfrsi 0 + %3:doubleregs = A4_combineir 0, %2 + %4:doubleregs = L2_loadrd_io %stack.0.rem.i11.i, 0 + %5:doubleregs = A2_tfrpi 0 + J2_loop0i %bb.1, 4, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.1 (machine-block-address-taken): + successors: %bb.2(0x04000000), %bb.1(0x7c000000) + + %6:doubleregs = PHI %4, %bb.0, %7, %bb.1 + %8:doubleregs = PHI %3, %bb.0, %9, %bb.1 + %10:predregs = C2_cmpeqp %6, %5 + %11:intregs = C2_muxii %10, 0, -1 + %12:doubleregs = A2_addsp %11, %6 + %13:doubleregs = S2_asl_i_p %8, 2 + %14:intregs = S2_setbit_i %13.isub_lo, 1 + %15:intregs = C2_mux %10, %13.isub_lo, %14 + %16:predregs = C2_cmpeqp %12, %5 + %17:intregs = C2_muxii %16, 0, -1 + %18:doubleregs = A2_addsp %17, %12 + %19:intregs = S2_setbit_i %15, 0 + %20:intregs = C2_mux %16, %15, %19 + %21:predregs = C2_cmpeqp %18, %5 + %22:intregs = C2_muxii %21, 0, -1 + %23:doubleregs = A2_addsp %22, %18 + %24:intregs = S2_asl_i_r %20, 2 + %25:intregs = S2_extractu %8.isub_lo, 2, 28 + %26:intregs = S2_asl_i_r_or %25, %13.isub_hi, 2 + %27:intregs = S2_setbit_i %24, 1 + %28:intregs = C2_mux %21, %24, %27 + %29:predregs = C2_cmpeqp %23, %5 + %30:intregs = C2_muxii %29, 0, -1 + %7:doubleregs = A2_addsp %30, %23 + %31:intregs = S2_setbit_i %28, 0 + %32:intregs = C2_mux %29, %28, %31 + %9:doubleregs = REG_SEQUENCE %26, %subreg.isub_hi, %32, %subreg.isub_lo + ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.2, implicit-def dead $pc + + bb.2: + successors: %bb.3(0x80000000) + + S2_storerdgp @_dp_ctrl_calc_tu_temp2_fp, %9, implicit $gp + %33:intregs = A2_tfrsi 0 + %34:intregs = PS_fi %stack.0.rem.i11.i, 0 + %35:intregs = A2_tfrsi 0 + %36:doubleregs = L2_loadrd_io %stack.0.rem.i11.i, 0 + %37:doubleregs = A2_tfrpi 124 + %38:intregs = A2_tfrsi -1000 + %39:intregs = A2_tfrsi -1 + J2_loop0i %bb.3, 4, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.3 (machine-block-address-taken): + successors: %bb.4(0x04000000), %bb.3(0x7c000000) + + %40:doubleregs = PHI %36, %bb.2, %41, %bb.3 + %42:intregs = PHI %35, %bb.2, %43, %bb.3 + %44:intregs = PHI %33, %bb.2, %45, %bb.3 + %46:doubleregs = S2_lsr_i_p %40, 3 + %47:predregs = C2_cmpgtup %46, %37 + %48:intregs = C2_mux %47, %38, %33 + %49:intregs = C2_mux %47, %39, %33 + %50:doubleregs = REG_SEQUENCE %49, %subreg.isub_hi, %48, %subreg.isub_lo + %51:doubleregs = A2_addp %50, %40 + %52:intregs = S2_asl_i_r %42, 2 + %53:intregs = S2_extractu %42, 2, 30 + %45:intregs = S2_asl_i_r_or %53, %44, 2 + %54:intregs = S2_setbit_i %52, 1 + %55:intregs = C2_mux %47, %54, %52 + %56:doubleregs = S2_lsr_i_p %51, 3 + %57:predregs = C2_cmpgtup %56, %37 + %58:intregs = C2_mux %57, %38, %33 + %59:intregs = C2_mux %57, %39, %33 + %60:doubleregs = REG_SEQUENCE %59, %subreg.isub_hi, %58, %subreg.isub_lo + %41:doubleregs = A2_addp %60, %51 + %61:intregs = S2_setbit_i %55, 0 + %43:intregs = C2_mux %57, %61, %55 + ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.4, implicit-def dead $pc + + bb.4: + S2_storerigp @_dp_ctrl_calc_tu_temp1_fp, %43, implicit $gp + %62:intregs = A2_tfrsi 0 + %63:doubleregs = REG_SEQUENCE %43, %subreg.isub_lo, %62, %subreg.isub_hi + S2_storerdgp @dp_panel_update_tu_timings___trans_tmp_5, %63, implicit $gp + S2_storerdgp @_dp_ctrl_calc_tu___trans_tmp_8, %9, implicit $gp + PS_jmpret $r31, implicit-def dead $pc + +... From 91ffc12a820164bdebb1a388b5d7f41a6f25ce04 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 15 Aug 2024 13:18:33 +0200 Subject: [PATCH 011/441] [clang][Interp] Pass callee decl to null_callee diagnostics (#104426) The callee is null, not the full call expression. --- clang/lib/AST/Interp/Interp.h | 4 ++-- clang/test/AST/Interp/functions.cpp | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 3eab0cfd871385a..c2d73f32f0b20c9 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -2702,9 +2702,9 @@ inline bool CallPtr(InterpState &S, CodePtr OpPC, uint32_t ArgSize, const Function *F = FuncPtr.getFunction(); if (!F) { - const Expr *E = S.Current->getExpr(OpPC); + const auto *E = cast(S.Current->getExpr(OpPC)); S.FFDiag(E, diag::note_constexpr_null_callee) - << const_cast(E) << E->getSourceRange(); + << const_cast(E->getCallee()) << E->getSourceRange(); return false; } diff --git a/clang/test/AST/Interp/functions.cpp b/clang/test/AST/Interp/functions.cpp index f190262ad3ebee4..b00f59a8d8d433d 100644 --- a/clang/test/AST/Interp/functions.cpp +++ b/clang/test/AST/Interp/functions.cpp @@ -221,6 +221,17 @@ namespace Comparison { static_assert(pg == &g, ""); } + constexpr int Double(int n) { return 2 * n; } + constexpr int Triple(int n) { return 3 * n; } + constexpr int Twice(int (*F)(int), int n) { return F(F(n)); } + constexpr int Quadruple(int n) { return Twice(Double, n); } + constexpr auto Select(int n) -> int (*)(int) { + return n == 2 ? &Double : n == 3 ? &Triple : n == 4 ? &Quadruple : 0; + } + constexpr int Apply(int (*F)(int), int n) { return F(n); } // both-note {{'F' evaluates to a null function pointer}} + + constexpr int Invalid = Apply(Select(0), 0); // both-error {{must be initialized by a constant expression}} \ + // both-note {{in call to 'Apply(nullptr, 0)'}} } struct F { From 7898866065f6c9b72b5fa3e45e565baf8a5e7609 Mon Sep 17 00:00:00 2001 From: Adrian Vogelsgesang Date: Thu, 15 Aug 2024 13:19:32 +0200 Subject: [PATCH 012/441] [lldb-dap] Expose log path in extension settings (#103482) lldb-dap already supports a log file which can be enabled by setting the `LLDBDAP_LOG` environment variable. With this commit, the log location can be set directly through the VS-Code extension settings. Also, this commit bumps the version number, such that the new VS Code extension gets published to the Marketplace. --- lldb/tools/lldb-dap/package-lock.json | 4 ++-- lldb/tools/lldb-dap/package.json | 7 ++++++- lldb/tools/lldb-dap/src-ts/extension.ts | 26 ++++++++++++++++++++----- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/lldb/tools/lldb-dap/package-lock.json b/lldb/tools/lldb-dap/package-lock.json index 8c70cc2d30e1443..96570e42dbfdc4b 100644 --- a/lldb/tools/lldb-dap/package-lock.json +++ b/lldb/tools/lldb-dap/package-lock.json @@ -1,12 +1,12 @@ { "name": "lldb-dap", - "version": "0.2.0", + "version": "0.2.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "lldb-dap", - "version": "0.2.0", + "version": "0.2.4", "license": "Apache 2.0 License with LLVM exceptions", "devDependencies": { "@types/node": "^18.11.18", diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index 97e4efe7bac19d3..4f4261d1718c018 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -1,7 +1,7 @@ { "name": "lldb-dap", "displayName": "LLDB DAP", - "version": "0.2.3", + "version": "0.2.4", "publisher": "llvm-vs-code-extensions", "homepage": "https://lldb.llvm.org", "description": "LLDB debugging from VSCode", @@ -73,6 +73,11 @@ "scope": "resource", "type": "string", "description": "The path to the lldb-dap binary." + }, + "lldb-dap.log-path": { + "scope": "resource", + "type": "string", + "description": "The log path for lldb-dap (if any)" } } }, diff --git a/lldb/tools/lldb-dap/src-ts/extension.ts b/lldb/tools/lldb-dap/src-ts/extension.ts index 791175f7b46224e..7df09f7a29dad71 100644 --- a/lldb/tools/lldb-dap/src-ts/extension.ts +++ b/lldb/tools/lldb-dap/src-ts/extension.ts @@ -14,13 +14,29 @@ function createDefaultLLDBDapOptions(): LLDBDapOptions { session: vscode.DebugSession, packageJSONExecutable: vscode.DebugAdapterExecutable | undefined, ): Promise { - const path = vscode.workspace - .getConfiguration("lldb-dap", session.workspaceFolder) - .get("executable-path"); + const config = vscode.workspace + .getConfiguration("lldb-dap", session.workspaceFolder); + const path = config.get("executable-path"); + const log_path = config.get("log-path"); + + let env : { [key: string]: string } = {}; + if (log_path) { + env["LLDBDAP_LOG"] = log_path; + } + if (path) { - return new vscode.DebugAdapterExecutable(path, []); + return new vscode.DebugAdapterExecutable(path, [], {env}); + } else if (packageJSONExecutable) { + return new vscode.DebugAdapterExecutable(packageJSONExecutable.command, packageJSONExecutable.args, { + ...packageJSONExecutable.options, + env: { + ...packageJSONExecutable.options?.env, + ...env + } + }); + } else { + return undefined; } - return packageJSONExecutable; }, }; } From 57a19ac3365f1dc255e6f24fcb7afcde2ccef8f9 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 15 Aug 2024 13:34:03 +0200 Subject: [PATCH 013/441] Reapply "[lldb] Tolerate multiple compile units with the same DWO ID (#100577)" (#104041) The only change vs. the first version of the patch is that I've made DWARFUnit linking thread-safe/unit. This was necessary because, during the indexing step, two skeleton units could attempt to associate themselves with the split unit. The original commit message was: I ran into this when LTO completely emptied two compile units, so they ended up with the same hash (see #100375). Although, ideally, the compiler would try to ensure we don't end up with a hash collision even in this case, guaranteeing their absence is practically impossible. This patch ensures this situation does not bring down lldb. --- .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 41 ++--- .../Plugins/SymbolFile/DWARF/DWARFUnit.h | 4 +- .../SymbolFile/DWARF/x86/dwp-hash-collision.s | 142 ++++++++++++++++++ 3 files changed, 168 insertions(+), 19 deletions(-) create mode 100644 lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index 66a762bf9b68542..81f937762e35a65 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -97,12 +97,14 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() { *m_dwo_id, m_first_die.GetOffset())); return; // Can't fetch the compile unit from the dwo file. } - // If the skeleton compile unit gets its unit DIE parsed first, then this - // will fill in the DWO file's back pointer to this skeleton compile unit. - // If the DWO files get parsed on their own first the skeleton back link - // can be done manually in DWARFUnit::GetSkeletonCompileUnit() which will - // do a reverse lookup and cache the result. - dwo_cu->SetSkeletonUnit(this); + + // Link the DWO unit to this object, if it hasn't been linked already (this + // can happen when we have an index, and the DWO unit is parsed first). + if (!dwo_cu->LinkToSkeletonUnit(*this)) { + SetDwoError(Status::createWithFormat( + "multiple compile units with Dwo ID {0:x16}", *m_dwo_id)); + return; + } DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly(); if (!dwo_cu_die.IsValid()) { @@ -708,23 +710,28 @@ uint8_t DWARFUnit::GetAddressByteSize(const DWARFUnit *cu) { uint8_t DWARFUnit::GetDefaultAddressSize() { return 4; } DWARFCompileUnit *DWARFUnit::GetSkeletonUnit() { - if (m_skeleton_unit == nullptr && IsDWOUnit()) { + if (m_skeleton_unit.load() == nullptr && IsDWOUnit()) { SymbolFileDWARFDwo *dwo = llvm::dyn_cast_or_null(&GetSymbolFileDWARF()); // Do a reverse lookup if the skeleton compile unit wasn't set. - if (dwo) - m_skeleton_unit = dwo->GetBaseSymbolFile().GetSkeletonUnit(this); + DWARFUnit *candidate_skeleton_unit = + dwo ? dwo->GetBaseSymbolFile().GetSkeletonUnit(this) : nullptr; + if (candidate_skeleton_unit) + (void)LinkToSkeletonUnit(*candidate_skeleton_unit); + // Linking may fail due to a race, so be sure to return the actual value. } - return llvm::dyn_cast_or_null(m_skeleton_unit); + return llvm::dyn_cast_or_null(m_skeleton_unit.load()); } -void DWARFUnit::SetSkeletonUnit(DWARFUnit *skeleton_unit) { - // If someone is re-setting the skeleton compile unit backlink, make sure - // it is setting it to a valid value when it wasn't valid, or if the - // value in m_skeleton_unit was valid, it should be the same value. - assert(skeleton_unit); - assert(m_skeleton_unit == nullptr || m_skeleton_unit == skeleton_unit); - m_skeleton_unit = skeleton_unit; +bool DWARFUnit::LinkToSkeletonUnit(DWARFUnit &skeleton_unit) { + DWARFUnit *expected_unit = nullptr; + if (m_skeleton_unit.compare_exchange_strong(expected_unit, &skeleton_unit)) + return true; + if (expected_unit == &skeleton_unit) { + // Exchange failed because it already contained the right value. + return true; + } + return false; // Already linked to a different unit. } bool DWARFUnit::Supports_DW_AT_APPLE_objc_complete_type() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 85c37971ced8e0f..148932d67b908c2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -170,7 +170,7 @@ class DWARFUnit : public UserID { /// both cases correctly and avoids crashes. DWARFCompileUnit *GetSkeletonUnit(); - void SetSkeletonUnit(DWARFUnit *skeleton_unit); + bool LinkToSkeletonUnit(DWARFUnit &skeleton_unit); bool Supports_DW_AT_APPLE_objc_complete_type(); @@ -308,7 +308,7 @@ class DWARFUnit : public UserID { const llvm::DWARFAbbreviationDeclarationSet *m_abbrevs = nullptr; lldb_private::CompileUnit *m_lldb_cu = nullptr; // If this is a DWO file, we have a backlink to our skeleton compile unit. - DWARFUnit *m_skeleton_unit = nullptr; + std::atomic m_skeleton_unit = nullptr; // The compile unit debug information entry item DWARFDebugInfoEntry::collection m_die_array; mutable llvm::sys::RWMutex m_die_array_mutex; diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s new file mode 100644 index 000000000000000..d626b4602ad58f4 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s @@ -0,0 +1,142 @@ +## Test that lldb handles (mainly, that it doesn't crash) the situation where +## two skeleton compile units have the same DWO ID (and try to claim the same +## split unit from the DWP file. This can sometimes happen when the compile unit +## is nearly empty (e.g. because LTO has optimized all of it away). + +# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s --defsym MAIN=0 > %t +# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t.dwp +# RUN: %lldb %t -o "image lookup -t my_enum_type" \ +# RUN: -o "image dump separate-debug-info" -o exit | FileCheck %s + +## Check that we're able to access the type within the split unit (no matter +## which skeleton unit it ends up associated with). Completely ignoring the unit +## might also be reasonable. +# CHECK: image lookup -t my_enum_type +# CHECK: 1 match found +# CHECK: name = "my_enum_type", byte-size = 4, compiler_type = "enum my_enum_type { +# CHECK-NEXT: }" +# +## Check that we get some indication of the error. +# CHECK: image dump separate-debug-info +# CHECK: Dwo ID Err Dwo Path +# CHECK: 0xdeadbeefbaadf00d E multiple compile units with Dwo ID 0xdeadbeefbaadf00d + +.set DWO_ID, 0xdeadbeefbaadf00d + +## The main file. +.ifdef MAIN + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 74 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 0x76 # DW_AT_dwo_name + .byte 8 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + + .section .debug_info,"",@progbits +.irpc I,01 +.Lcu_begin\I: + .long .Ldebug_info_end\I-.Ldebug_info_start\I # Length of Unit +.Ldebug_info_start\I: + .short 5 # DWARF version number + .byte 4 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad DWO_ID # DWO id + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .ascii "foo" + .byte '0' + \I + .asciz ".dwo\0" # DW_AT_dwo_name +.Ldebug_info_end\I: +.endr + +.else +## DWP file starts here. + + .section .debug_abbrev.dwo,"e",@progbits +.LAbbrevBegin: + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 8 # DW_FORM_string + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 4 # DW_TAG_enumeration_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) +.LAbbrevEnd: + .section .debug_info.dwo,"e",@progbits +.LCUBegin: +.Lcu_begin1: + .long .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit +.Ldebug_info_start1: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad DWO_ID # DWO id + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .short 12 # DW_AT_language + .byte 2 # Abbrev [2] DW_TAG_enumeration_type + .asciz "my_enum_type" # DW_AT_name + .long .Lint-.Lcu_begin1 # DW_AT_type + .byte 4 # DW_AT_byte_size +.Lint: + .byte 4 # Abbrev [4] DW_TAG_base_type + .asciz "int" # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end1: +.LCUEnd: + .section .debug_cu_index, "", @progbits +## Header: + .short 5 # Version + .short 0 # Padding + .long 2 # Section count + .long 1 # Unit count + .long 2 # Slot count +## Hash Table of Signatures: + .quad 0 + .quad DWO_ID +## Parallel Table of Indexes: + .long 0 + .long 1 +## Table of Section Offsets: +## Row 0: + .long 1 # DW_SECT_INFO + .long 3 # DW_SECT_ABBREV +## Row 1: + .long 0 # Offset in .debug_info.dwo + .long 0 # Offset in .debug_abbrev.dwo +## Table of Section Sizes: + .long .LCUEnd-.LCUBegin # Size in .debug_info.dwo + .long .LAbbrevEnd-.LAbbrevBegin # Size in .debug_abbrev.dwo +.endif From 2e9f3f3b842538b55552aa22fdc0bf1966637ca8 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 15 Aug 2024 12:54:26 +0100 Subject: [PATCH 014/441] [AMDGPU][llvm-split] Fix another division by zero (#104421) Somehow I missed this in #98888. It requires a log file, or the debug flag to be passed. --- llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 3 ++- .../tools/llvm-split/AMDGPU/declarations-debug.ll | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/llvm-split/AMDGPU/declarations-debug.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp index bd0f0e048809bcd..df084cf41c47836 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp @@ -568,12 +568,13 @@ doPartitioning(SplitModuleLogger &SML, Module &M, unsigned NumParts, } if (SML) { + CostType ModuleCostOr1 = ModuleCost ? ModuleCost : 1; for (const auto &[Idx, Part] : enumerate(Partitions)) { CostType Cost = 0; for (auto *Fn : Part) Cost += FnCosts.at(Fn); SML << "P" << Idx << " has a total cost of " << Cost << " (" - << format("%0.2f", (float(Cost) / ModuleCost) * 100) + << format("%0.2f", (float(Cost) / ModuleCostOr1) * 100) << "% of source module)\n"; } diff --git a/llvm/test/tools/llvm-split/AMDGPU/declarations-debug.ll b/llvm/test/tools/llvm-split/AMDGPU/declarations-debug.ll new file mode 100644 index 000000000000000..66b89659f5d4f98 --- /dev/null +++ b/llvm/test/tools/llvm-split/AMDGPU/declarations-debug.ll @@ -0,0 +1,12 @@ +; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa --debug + +; REQUIRES: asserts + +; CHECK: --Partitioning Starts-- +; CHECK: P0 has a total cost of 0 (0.00% of source module) +; CHECK: P1 has a total cost of 0 (0.00% of source module) +; CHECK: --Partitioning Done-- + +declare void @A() + +declare void @B() From 2ccbf92f878e385ab0067e2f767e39b295906a47 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 15 Aug 2024 15:53:55 +0400 Subject: [PATCH 015/441] InferAddressSpaces: Restore non-instruction user check Fixes regression after 79658d65c3c7a075382b74d81e74714e2ea9bd2d. We were missing test coverage for the nested constant expression case. --- llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 14 +++++++++----- .../Transforms/InferAddressSpaces/AMDGPU/basic.ll | 8 ++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 0c8aee8a494c031..3e3e5bfe2d63327 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -241,7 +241,7 @@ class InferAddressSpacesImpl { unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; unsigned getPredicatedAddrSpace(const Value &PtrV, - const Instruction *UserCtxI) const; + const Value *UserCtx) const; public: InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT, @@ -910,8 +910,13 @@ void InferAddressSpacesImpl::inferAddressSpaces( } } -unsigned InferAddressSpacesImpl::getPredicatedAddrSpace( - const Value &Ptr, const Instruction *UserCtxI) const { +unsigned +InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &Ptr, + const Value *UserCtx) const { + const Instruction *UserCtxI = dyn_cast(UserCtx); + if (!UserCtxI) + return UninitializedAddressSpace; + const Value *StrippedPtr = Ptr.stripInBoundsOffsets(); for (auto &AssumeVH : AC.assumptionsFor(StrippedPtr)) { if (!AssumeVH) @@ -986,8 +991,7 @@ bool InferAddressSpacesImpl::updateAddressSpace( OperandAS = PtrOperand->getType()->getPointerAddressSpace(); if (OperandAS == FlatAddrSpace) { // Check AC for assumption dominating V. - unsigned AS = - getPredicatedAddrSpace(*PtrOperand, &cast(V)); + unsigned AS = getPredicatedAddrSpace(*PtrOperand, &V); if (AS != UninitializedAddressSpace) { LLVM_DEBUG(dbgs() << " deduce operand AS from the predicate addrspace " diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll index e6f26aeb98b1412..eb39684a98b5f52 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll @@ -190,6 +190,14 @@ define i32 @atomicrmw_add_global_to_flat_preserve_amdgpu_md(ptr addrspace(1) %gl ret i32 %ret } +; Make sure there's no assert +; CHECK-LABEL: @try_infer_getelementptr_constant_null( +; CHECK-NEXT: %ce = getelementptr i8, ptr getelementptr inbounds (i8, ptr null, i64 8), i64 0 +define ptr @try_infer_getelementptr_constant_null() { + %ce = getelementptr i8, ptr getelementptr inbounds (i8, ptr null, i64 8), i64 0 + ret ptr %ce +} + attributes #0 = { nounwind } !0 = !{} From d2c26d82b0395b8b555be384ed778361ec176c14 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 15 Aug 2024 13:57:38 +0200 Subject: [PATCH 016/441] [clang][Interp] Use first field decl for Record field lookup (#104412) --- clang/lib/AST/Interp/Program.cpp | 1 + clang/lib/AST/Interp/Record.cpp | 2 +- clang/test/Modules/enum-codegen.cpp | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp index 5dd59d969853cce..9109956dc652cb2 100644 --- a/clang/lib/AST/Interp/Program.cpp +++ b/clang/lib/AST/Interp/Program.cpp @@ -329,6 +329,7 @@ Record *Program::getOrCreateRecord(const RecordDecl *RD) { // Reserve space for fields. Record::FieldList Fields; for (const FieldDecl *FD : RD->fields()) { + assert(FD == FD->getFirstDecl()); // Note that we DO create fields and descriptors // for unnamed bitfields here, even though we later ignore // them everywhere. That's so the FieldDecl's getFieldIndex() matches. diff --git a/clang/lib/AST/Interp/Record.cpp b/clang/lib/AST/Interp/Record.cpp index ac01524e1caf0d7..e7b741f2d458c84 100644 --- a/clang/lib/AST/Interp/Record.cpp +++ b/clang/lib/AST/Interp/Record.cpp @@ -37,7 +37,7 @@ const std::string Record::getName() const { } const Record::Field *Record::getField(const FieldDecl *FD) const { - auto It = FieldMap.find(FD); + auto It = FieldMap.find(FD->getFirstDecl()); assert(It != FieldMap.end() && "Missing field"); return It->second; } diff --git a/clang/test/Modules/enum-codegen.cpp b/clang/test/Modules/enum-codegen.cpp index 4397b457801da4e..0028ab00c15290f 100644 --- a/clang/test/Modules/enum-codegen.cpp +++ b/clang/test/Modules/enum-codegen.cpp @@ -1,5 +1,6 @@ // RUN: rm -rf %t // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fmodules-cache-path=%t %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fmodules-cache-path=%t %s -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s // CHECK: @{{.*var.*}} = {{.*}} %union.union_type { i8 1 }, From 42555cdba48b7d6d27c9a7d5c730733e66dec9a1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 15 Aug 2024 13:05:50 +0100 Subject: [PATCH 017/441] [VPlan] Run VPlan optimizations on plans in native path. Update buildVPlans (used in native path) to also run general VPlan optimizations in another small step to align both codepaths. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +++- .../AArch64/outer_loop_prefer_scalable.ll | 2 -- .../Transforms/LoopVectorize/dbg-outer-loop-vect.ll | 12 ++++-------- .../outer-loop-vec-phi-predecessor-order.ll | 2 -- .../LoopVectorize/outer_loop_hcfg_construction.ll | 4 ---- .../Transforms/LoopVectorize/outer_loop_scalable.ll | 2 -- .../vplan-vectorize-inner-loop-reduction.ll | 2 -- .../LoopVectorize/vplan-widen-call-instruction.ll | 2 -- 8 files changed, 7 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fdf8f7042c4fb86..58ef665e86d65ea 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7843,7 +7843,9 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF, auto MaxVFTimes2 = MaxVF * 2; for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { VFRange SubRange = {VF, MaxVFTimes2}; - VPlans.push_back(buildVPlan(SubRange)); + auto Plan = buildVPlan(SubRange); + VPlanTransforms::optimize(*Plan, *PSE.getSE()); + VPlans.push_back(std::move(Plan)); VF = SubRange.End; } } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index 933b46733f9d1b6..736be4995c575fc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -49,8 +49,6 @@ define void @foo() { ; CHECK: vector.latch: ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi [ [[TMP12]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI5]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[TMP16]], shufflevector ( insertelement ( poison, i64 1024, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll index 3f56361409b02f1..3bf2591391121ea 100644 --- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll @@ -13,7 +13,6 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]], !dbg [[DBG21]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND_CLEANUP32]] ] ; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]] ; CHECK: for.cond5.preheader1: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]] @@ -30,10 +29,7 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0, !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP32]], label [[FOR_COND5_PREHEADER1]], !dbg [[DBG26]] ; CHECK: vector.latch: -; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], , !dbg [[DBG27:![0-9]+]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[TMP7]], , !dbg [[DBG28:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: middle.block: @@ -59,9 +55,9 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC10]], 5, !dbg [[DBG25]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_COND5_PREHEADER]], !dbg [[DBG26]] ; CHECK: for.cond.cleanup3: -; CHECK-NEXT: [[INC13]] = add nuw nsw i64 [[I_023]], 1, !dbg [[DBG27]] +; CHECK-NEXT: [[INC13]] = add nuw nsw i64 [[I_023]], 1, !dbg [[DBG27:![0-9]+]] ; CHECK-NEXT: #dbg_value(i64 [[INC13]], [[META11]], !DIExpression(), [[META20]]) -; CHECK-NEXT: [[EXITCOND24_NOT:%.*]] = icmp eq i64 [[INC13]], 23, !dbg [[DBG28]] +; CHECK-NEXT: [[EXITCOND24_NOT:%.*]] = icmp eq i64 [[INC13]], 23, !dbg [[DBG28:![0-9]+]] ; CHECK-NEXT: br i1 [[EXITCOND24_NOT]], label [[EXIT]], label [[FOR_COND1_PREHEADER]], !dbg [[DBG21]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void, !dbg [[DBG35:![0-9]+]] @@ -164,13 +160,13 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; CHECK: [[DBG24]] = !DILocation(line: 11, column: 32, scope: [[META19]]) ; CHECK: [[DBG25]] = !DILocation(line: 11, column: 26, scope: [[META19]]) ; CHECK: [[DBG26]] = !DILocation(line: 11, column: 5, scope: [[META15]]) -; CHECK: [[DBG27]] = !DILocation(line: 10, column: 30, scope: [[META16]]) -; CHECK: [[DBG28]] = !DILocation(line: 10, column: 24, scope: [[META16]]) ; CHECK: [[LOOP29]] = distinct !{[[LOOP29]], [[DBG21]], [[META30:![0-9]+]], [[META31:![0-9]+]], [[META32:![0-9]+]]} ; CHECK: [[META30]] = !DILocation(line: 13, column: 13, scope: [[META12]]) ; CHECK: [[META31]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META32]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[DBG33]] = !DILocation(line: 13, column: 2, scope: [[META23]]) +; CHECK: [[DBG27]] = !DILocation(line: 10, column: 30, scope: [[META16]]) +; CHECK: [[DBG28]] = !DILocation(line: 10, column: 24, scope: [[META16]]) ; CHECK: [[LOOP34]] = distinct !{[[LOOP34]], [[DBG21]], [[META30]], [[META31]]} ; CHECK: [[DBG35]] = !DILocation(line: 14, column: 1, scope: [[DBG4]]) ;. diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll index 260f59c5c97e274..b6f2ae3c14893a5 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll @@ -41,8 +41,6 @@ define void @test(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_1_LATCH5]], label [[LOOP_2_HEADER1]] ; CHECK: vector.latch: -; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[TMP8]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll index 986a6082a6891db..5df4fce2190a3a6 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll @@ -52,8 +52,6 @@ define void @non_outermost_loop_hcfg_construction(i64 %n, ptr %a) { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_LATCH4]], label [[INNERMOST_LOOP1]] ; CHECK: vector.latch: -; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[TMP8]], [[BROADCAST_SPLAT3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -197,8 +195,6 @@ define void @non_outermost_loop_hcfg_construction_other_loops_at_same_level(i64 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_J0_CLEANUP4]], label [[INNERMOST_LOOP1]] ; CHECK: vector.latch: -; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[TMP8]], [[BROADCAST_SPLAT3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index 6cd7e3714201b7a..08f2b823815b1ce 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -55,8 +55,6 @@ define void @foo() { ; CHECK: vector.latch: ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi [ [[TMP12]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI5]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[TMP16]], shufflevector ( insertelement ( poison, i64 1024, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll index fdf73963e86464a..1552af3140e3837 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll @@ -40,8 +40,6 @@ define void @inner_loop_reduction(ptr noalias nocapture readonly %a.in, ptr noal ; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, ptr %c.out, <4 x i64> %[[VEC_INDEX]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> %[[C_PTR]], i32 8, <4 x i1> ) -; CHECK-NEXT: %[[VEC_INDEX_NEXT:.*]] = add nuw nsw <4 x i64> %[[VEC_INDEX]], -; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i64> %[[VEC_INDEX_NEXT]], ; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 ; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], ; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll index 3335c21c8d74571..4241409d3935fd6 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll @@ -30,8 +30,6 @@ define void @widen_call_instruction(ptr noalias nocapture readonly %a.in, ptr no ; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, ptr %c.out, <4 x i64> %[[VEC_INDEX]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> %[[C_PTR]], i32 8, <4 x i1> ) -; CHECK-NEXT: %[[VEC_INDEX_NEXT:.*]] = add nuw nsw <4 x i64> %[[VEC_INDEX]], -; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i64> %[[VEC_INDEX_NEXT]], ; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 ; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], ; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 From f71b63865140cf3c286baf3a77ba3e467f929504 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 15 Aug 2024 07:11:31 -0500 Subject: [PATCH 018/441] [libcxx] Use `aligned_alloc` for testing instead of `posix_memalign` (#101748) Summary: The `aligned_alloc` function is the C11 replacement for `posix_memalign`. We should favor the C standard over the POSIX standard so more C library implementations can run the tests. --- .../support.dynamic/new_faligned_allocation.pass.cpp | 2 +- libcxx/test/support/count_new.h | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp b/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp index 69c46f00fb65dc7..87f4783e12973e5 100644 --- a/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp @@ -76,7 +76,7 @@ int main(int, char**) { test_allocations(64, 64); // Size being a multiple of alignment also needs to be supported. test_allocations(64, 32); - // When aligned allocation is implemented using posix_memalign, + // When aligned allocation is implemented using aligned_alloc, // that function requires a minimum alignment of sizeof(void*). // Check that we can also create overaligned allocations with // an alignment argument less than sizeof(void*). diff --git a/libcxx/test/support/count_new.h b/libcxx/test/support/count_new.h index 2298c4fd63e845b..61c8ca16ab0d043 100644 --- a/libcxx/test/support/count_new.h +++ b/libcxx/test/support/count_new.h @@ -455,11 +455,23 @@ void operator delete[](void* p, std::nothrow_t const&) TEST_NOEXCEPT { # define USE_ALIGNED_ALLOC # endif +# if defined(__APPLE__) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) +# define TEST_HAS_NO_C11_ALIGNED_ALLOC +# endif +# elif defined(__ANDROID__) && __ANDROID_API__ < 28 +# define TEST_HAS_NO_C11_ALIGNED_ALLOC +# endif + inline void* allocate_aligned_impl(std::size_t size, std::align_val_t align) { const std::size_t alignment = static_cast(align); void* ret = nullptr; # ifdef USE_ALIGNED_ALLOC ret = _aligned_malloc(size, alignment); +# elif TEST_STD_VER >= 17 && !defined(TEST_HAS_NO_C11_ALIGNED_ALLOC) + size_t rounded_size = (size + alignment - 1) & ~(alignment - 1); + ret = aligned_alloc(alignment, size > rounded_size ? size : rounded_size); # else assert(posix_memalign(&ret, std::max(alignment, sizeof(void*)), size) != EINVAL); # endif From 846f790216e1a0c40f8890d489904c3d716cc998 Mon Sep 17 00:00:00 2001 From: Tobias Stadler Date: Thu, 15 Aug 2024 14:40:16 +0200 Subject: [PATCH 019/441] [GlobalISel] Combiner: Observer-based DCE and retrying of combines Continues the work for disabling fixed-point iteration in the Combiner (#94291). This introduces improved Observer-based heuristics in the GISel Combiner to retry combining defs/uses of modified instructions and for performing sparse dead code elimination. I have experimented a lot with the heuristics and this seems to be the minimal set of heuristics that allows disabling fixed-point iteration for AArch64 CTMark O2 without regressions. Enabling this globally would pass all regression tests for all official targets (apart from small benign diffs), but I have made this fully opt-in for now, because I can't quantify the impact for other targets. This should mostly be on-par with how the WorkList-aware functions in the InstCombiner and DAGCombiner handle rescheduling instructions for recombining. For performance numbers see my follow-up patch for AArch64 (#102167) Pull Request: https://github.com/llvm/llvm-project/pull/102163 --- .../llvm/CodeGen/GlobalISel/Combiner.h | 10 +- .../llvm/CodeGen/GlobalISel/CombinerInfo.h | 25 ++ llvm/lib/CodeGen/GlobalISel/Combiner.cpp | 219 +++++++++++++++--- 3 files changed, 214 insertions(+), 40 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h index f826601544932d1..fa6a7be6cf6c321 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h @@ -15,13 +15,13 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_H #define LLVM_CODEGEN_GLOBALISEL_COMBINER_H +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" namespace llvm { class MachineRegisterInfo; -struct CombinerInfo; class GISelCSEInfo; class TargetPassConfig; class MachineFunction; @@ -33,8 +33,12 @@ class MachineIRBuilder; /// TODO: Is it worth making this module-wide? class Combiner : public GIMatchTableExecutor { private: + using WorkListTy = GISelWorkList<512>; + class WorkListMaintainer; - GISelWorkList<512> WorkList; + template class WorkListMaintainerImpl; + + WorkListTy WorkList; // We have a little hack here where keep the owned pointers private, and only // expose a reference. This has two purposes: @@ -48,6 +52,8 @@ class Combiner : public GIMatchTableExecutor { bool HasSetupMF = false; + static bool tryDCE(MachineInstr &MI, MachineRegisterInfo &MRI); + public: /// If CSEInfo is not null, then the Combiner will use CSEInfo as the observer /// and also create a CSEMIRBuilder. Pass nullptr if CSE is not needed. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h index 2b0eb71f8808277..67f95c962c58246 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -53,6 +53,31 @@ struct CombinerInfo { /// The maximum number of times the Combiner will iterate over the /// MachineFunction. Setting this to 0 enables fixed-point iteration. unsigned MaxIterations = 0; + + enum class ObserverLevel { + /// Only retry combining created/changed instructions. + /// This replicates the legacy default Observer behavior for use with + /// fixed-point iteration. + Basic, + /// Enables Observer-based detection of dead instructions. This can save + /// some compile-time if full disabling of fixed-point iteration is not + /// desired. If the input IR doesn't contain dead instructions, consider + /// disabling \p EnableFullDCE. + DCE, + /// Enables Observer-based DCE and additional heuristics that retry + /// combining defined and used instructions of modified instructions. + /// This provides a good balance between compile-time and completeness of + /// combining without needing fixed-point iteration. + SinglePass, + }; + + /// Select how the Combiner acts on MIR changes. + ObserverLevel ObserverLvl = ObserverLevel::Basic; + + /// Whether dead code elimination is performed before each Combiner iteration. + /// If Observer-based DCE is enabled, this controls if a full DCE pass is + /// performed before the first Combiner iteration. + bool EnableFullDCE = true; }; } // namespace llvm diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 49842e5fd65da6f..75b2525e368af68 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -45,61 +45,190 @@ cl::OptionCategory GICombinerOptionCategory( ); } // end namespace llvm -/// This class acts as the glue the joins the CombinerHelper to the overall +/// This class acts as the glue that joins the CombinerHelper to the overall /// Combine algorithm. The CombinerHelper is intended to report the /// modifications it makes to the MIR to the GISelChangeObserver and the -/// observer subclass will act on these events. In this case, instruction -/// erasure will cancel any future visits to the erased instruction and -/// instruction creation will schedule that instruction for a future visit. -/// Other Combiner implementations may require more complex behaviour from -/// their GISelChangeObserver subclass. +/// observer subclass will act on these events. class Combiner::WorkListMaintainer : public GISelChangeObserver { - using WorkListTy = GISelWorkList<512>; - WorkListTy &WorkList; +protected: +#ifndef NDEBUG /// The instructions that have been created but we want to report once they /// have their operands. This is only maintained if debug output is requested. -#ifndef NDEBUG - SetVector CreatedInstrs; + SmallSetVector CreatedInstrs; #endif + using Level = CombinerInfo::ObserverLevel; public: - WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {} + static std::unique_ptr + create(Level Lvl, WorkListTy &WorkList, MachineRegisterInfo &MRI); + virtual ~WorkListMaintainer() = default; + void reportFullyCreatedInstrs() { + LLVM_DEBUG({ + for (auto *MI : CreatedInstrs) { + dbgs() << "Created: " << *MI; + } + CreatedInstrs.clear(); + }); + } + + virtual void reset() = 0; + virtual void appliedCombine() = 0; +}; + +/// A configurable WorkListMaintainer implementation. +/// The ObserverLevel determines how the WorkListMaintainer reacts to MIR +/// changes. +template +class Combiner::WorkListMaintainerImpl : public Combiner::WorkListMaintainer { + WorkListTy &WorkList; + MachineRegisterInfo &MRI; + + // Defer handling these instructions until the combine finishes. + SmallSetVector DeferList; + + // Track VRegs that (might) have lost a use. + SmallSetVector LostUses; + +public: + WorkListMaintainerImpl(WorkListTy &WorkList, MachineRegisterInfo &MRI) + : WorkList(WorkList), MRI(MRI) {} + + virtual ~WorkListMaintainerImpl() = default; + + void reset() override { + DeferList.clear(); + LostUses.clear(); + } + void erasingInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n"); + // MI will become dangling, remove it from all lists. + LLVM_DEBUG(dbgs() << "Erasing: " << MI; CreatedInstrs.remove(&MI)); WorkList.remove(&MI); + if constexpr (Lvl != Level::Basic) { + DeferList.remove(&MI); + noteLostUses(MI); + } } + void createdInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Creating: " << MI << "\n"); - WorkList.insert(&MI); - LLVM_DEBUG(CreatedInstrs.insert(&MI)); + LLVM_DEBUG(dbgs() << "Creating: " << MI; CreatedInstrs.insert(&MI)); + if constexpr (Lvl == Level::Basic) + WorkList.insert(&MI); + else + // Defer handling newly created instructions, because they don't have + // operands yet. We also insert them into the WorkList in reverse + // order so that they will be combined top down. + DeferList.insert(&MI); } + void changingInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Changing: " << MI << "\n"); - WorkList.insert(&MI); + LLVM_DEBUG(dbgs() << "Changing: " << MI); + // Some uses might get dropped when MI is changed. + // For now, overapproximate by assuming all uses will be dropped. + // TODO: Is a more precise heuristic or manual tracking of use count + // decrements worth it? + if constexpr (Lvl != Level::Basic) + noteLostUses(MI); } + void changedInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Changed: " << MI << "\n"); - WorkList.insert(&MI); + LLVM_DEBUG(dbgs() << "Changed: " << MI); + if constexpr (Lvl == Level::Basic) + WorkList.insert(&MI); + else + // Defer this for DCE + DeferList.insert(&MI); } - void reportFullyCreatedInstrs() { - LLVM_DEBUG(for (const auto *MI - : CreatedInstrs) { - dbgs() << "Created: "; - MI->print(dbgs()); - }); - LLVM_DEBUG(CreatedInstrs.clear()); + // Only track changes during the combine and then walk the def/use-chains once + // the combine is finished, because: + // - instructions might have multiple defs during the combine. + // - use counts aren't accurate during the combine. + void appliedCombine() override { + if constexpr (Lvl == Level::Basic) + return; + + // DCE deferred instructions and add them to the WorkList bottom up. + while (!DeferList.empty()) { + MachineInstr &MI = *DeferList.pop_back_val(); + if (tryDCE(MI, MRI)) + continue; + + if constexpr (Lvl >= Level::SinglePass) + addUsersToWorkList(MI); + + WorkList.insert(&MI); + } + + // Handle instructions that have lost a user. + while (!LostUses.empty()) { + Register Use = LostUses.pop_back_val(); + MachineInstr *UseMI = MRI.getVRegDef(Use); + if (!UseMI) + continue; + + // If DCE succeeds, UseMI's uses are added back to LostUses by + // erasingInstr. + if (tryDCE(*UseMI, MRI)) + continue; + + if constexpr (Lvl >= Level::SinglePass) { + // OneUse checks are relatively common, so we might be able to combine + // the single remaining user of this Reg. + if (MRI.hasOneNonDBGUser(Use)) + WorkList.insert(&*MRI.use_instr_nodbg_begin(Use)); + + WorkList.insert(UseMI); + } + } + } + + void noteLostUses(MachineInstr &MI) { + for (auto &Use : MI.explicit_uses()) { + if (!Use.isReg() || !Use.getReg().isVirtual()) + continue; + LostUses.insert(Use.getReg()); + } + } + + void addUsersToWorkList(MachineInstr &MI) { + for (auto &Def : MI.defs()) { + Register DefReg = Def.getReg(); + if (!DefReg.isVirtual()) + continue; + for (auto &UseMI : MRI.use_nodbg_instructions(DefReg)) { + WorkList.insert(&UseMI); + } + } } }; +std::unique_ptr +Combiner::WorkListMaintainer::create(Level Lvl, WorkListTy &WorkList, + MachineRegisterInfo &MRI) { + switch (Lvl) { + case Level::Basic: + return std::make_unique>(WorkList, + MRI); + case Level::DCE: + return std::make_unique>(WorkList, MRI); + case Level::SinglePass: + return std::make_unique>(WorkList, + MRI); + default: + llvm_unreachable("Illegal ObserverLevel"); + } +} + Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, GISelKnownBits *KB, GISelCSEInfo *CSEInfo) : Builder(CSEInfo ? std::make_unique() : std::make_unique()), - WLObserver(std::make_unique(WorkList)), + WLObserver(WorkListMaintainer::create(CInfo.ObserverLvl, WorkList, + MF.getRegInfo())), ObserverWrapper(std::make_unique()), CInfo(CInfo), Observer(*ObserverWrapper), B(*Builder), MF(MF), MRI(MF.getRegInfo()), KB(KB), TPC(TPC), CSEInfo(CSEInfo) { @@ -115,6 +244,15 @@ Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo, Combiner::~Combiner() = default; +bool Combiner::tryDCE(MachineInstr &MI, MachineRegisterInfo &MRI) { + if (!isTriviallyDead(MI, MRI)) + return false; + LLVM_DEBUG(dbgs() << "Dead: " << MI); + llvm::salvageDebugInfo(MRI, MI); + MI.eraseFromParent(); + return true; +} + bool Combiner::combineMachineInstrs() { // If the ISel pipeline failed, do not bother running this pass. // FIXME: Should this be here or in individual combiner passes. @@ -141,27 +279,29 @@ bool Combiner::combineMachineInstrs() { ++Iteration; LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n'); + Changed = false; WorkList.clear(); + WLObserver->reset(); ObserverWrapper->clearObservers(); if (CSEInfo) ObserverWrapper->addObserver(CSEInfo); + // If Observer-based DCE is enabled, perform full DCE only before the first + // iteration. + bool EnableDCE = CInfo.ObserverLvl >= CombinerInfo::ObserverLevel::DCE + ? CInfo.EnableFullDCE && Iteration == 1 + : CInfo.EnableFullDCE; + // Collect all instructions. Do a post order traversal for basic blocks and // insert with list bottom up, so while we pop_back_val, we'll traverse top // down RPOT. - Changed = false; - RAIIMFObsDelInstaller DelInstall(MF, *ObserverWrapper); for (MachineBasicBlock *MBB : post_order(&MF)) { for (MachineInstr &CurMI : llvm::make_early_inc_range(llvm::reverse(*MBB))) { // Erase dead insts before even adding to the list. - if (isTriviallyDead(CurMI, MRI)) { - LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n"); - llvm::salvageDebugInfo(MRI, CurMI); - CurMI.eraseFromParent(); + if (EnableDCE && tryDCE(CurMI, MRI)) continue; - } WorkList.deferred_insert(&CurMI); } } @@ -171,10 +311,13 @@ bool Combiner::combineMachineInstrs() { ObserverWrapper->addObserver(WLObserver.get()); // Main Loop. Process the instructions here. while (!WorkList.empty()) { - MachineInstr *CurrInst = WorkList.pop_back_val(); - LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;); - Changed |= tryCombineAll(*CurrInst); - WLObserver->reportFullyCreatedInstrs(); + MachineInstr &CurrInst = *WorkList.pop_back_val(); + LLVM_DEBUG(dbgs() << "\nTry combining " << CurrInst); + bool AppliedCombine = tryCombineAll(CurrInst); + LLVM_DEBUG(WLObserver->reportFullyCreatedInstrs()); + Changed |= AppliedCombine; + if (AppliedCombine) + WLObserver->appliedCombine(); } MFChanged |= Changed; From bfce1aae76de05dd6ac9fbbd997295fe5aa280c0 Mon Sep 17 00:00:00 2001 From: Janek van Oirschot Date: Thu, 15 Aug 2024 13:43:13 +0100 Subject: [PATCH 020/441] [AMDGPU] MCExpr printing helper with KnownBits support (#95951) Walks over the MCExpr and uses KnownBits to deduce whether an expression is known and if so, prints said known value. Should support the most common MCExpr cases for AMDGPU metadata. --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 10 +- .../AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp | 374 ++++++++++++++++++ .../Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h | 13 + .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 28 +- .../AMDGPU/Utils/AMDKernelCodeTUtils.cpp | 46 +-- .../Target/AMDGPU/Utils/AMDKernelCodeTUtils.h | 5 +- llvm/test/MC/AMDGPU/amd_kernel_code_t.s | 36 +- llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s | 64 +-- llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s | 60 +-- llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s | 58 +-- llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s | 54 +-- llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s | 54 +-- llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s | 46 +-- 13 files changed, 611 insertions(+), 237 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index e64e28e01d3d185..b90d245b7bd3944 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -395,12 +395,10 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( SmallString<128> AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) { SmallString<128> Str; raw_svector_ostream OSS(Str); - int64_t IVal; - if (Value->evaluateAsAbsolute(IVal)) { - OSS << static_cast(IVal); - } else { - Value->print(OSS, MAI); - } + auto &Streamer = getTargetStreamer()->getStreamer(); + auto &Context = Streamer.getContext(); + const MCExpr *New = foldAMDGPUMCExpr(Value, Context); + printAMDGPUMCExpr(New, OSS, MAI); return Str; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp index b2a9667f7c23ea2..4fbd7d0f889457d 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp @@ -10,12 +10,14 @@ #include "GCNSubtarget.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include @@ -303,3 +305,375 @@ const AMDGPUMCExpr *AMDGPUMCExpr::createOccupancy(unsigned InitOcc, CreateExpr(InitOcc), NumSGPRs, NumVGPRs}, Ctx); } + +static KnownBits fromOptionalToKnownBits(std::optional CompareResult) { + static constexpr unsigned BitWidth = 64; + const APInt True(BitWidth, 1); + const APInt False(BitWidth, 0); + if (CompareResult) { + return *CompareResult ? KnownBits::makeConstant(True) + : KnownBits::makeConstant(False); + } + + KnownBits UnknownBool(/*BitWidth=*/1); + return UnknownBool.zext(BitWidth); +} + +using KnownBitsMap = DenseMap; +static void knownBitsMapHelper(const MCExpr *Expr, KnownBitsMap &KBM, + unsigned Depth = 0); + +static void binaryOpKnownBitsMapHelper(const MCExpr *Expr, KnownBitsMap &KBM, + unsigned Depth) { + static constexpr unsigned BitWidth = 64; + const MCBinaryExpr *BExpr = cast(Expr); + const MCExpr *LHS = BExpr->getLHS(); + const MCExpr *RHS = BExpr->getRHS(); + + knownBitsMapHelper(LHS, KBM, Depth + 1); + knownBitsMapHelper(RHS, KBM, Depth + 1); + KnownBits LHSKnown = KBM[LHS]; + KnownBits RHSKnown = KBM[RHS]; + + switch (BExpr->getOpcode()) { + default: + KBM[Expr] = KnownBits(BitWidth); + return; + case MCBinaryExpr::Opcode::Add: + KBM[Expr] = KnownBits::add(LHSKnown, RHSKnown); + return; + case MCBinaryExpr::Opcode::And: + KBM[Expr] = LHSKnown & RHSKnown; + return; + case MCBinaryExpr::Opcode::Div: + KBM[Expr] = KnownBits::sdiv(LHSKnown, RHSKnown); + return; + case MCBinaryExpr::Opcode::EQ: { + std::optional CompareRes = KnownBits::eq(LHSKnown, RHSKnown); + KBM[Expr] = fromOptionalToKnownBits(CompareRes); + return; + } + case MCBinaryExpr::Opcode::NE: { + std::optional CompareRes = KnownBits::ne(LHSKnown, RHSKnown); + KBM[Expr] = fromOptionalToKnownBits(CompareRes); + return; + } + case MCBinaryExpr::Opcode::GT: { + std::optional CompareRes = KnownBits::sgt(LHSKnown, RHSKnown); + KBM[Expr] = fromOptionalToKnownBits(CompareRes); + return; + } + case MCBinaryExpr::Opcode::GTE: { + std::optional CompareRes = KnownBits::sge(LHSKnown, RHSKnown); + KBM[Expr] = fromOptionalToKnownBits(CompareRes); + return; + } + case MCBinaryExpr::Opcode::LAnd: { + std::optional CompareRes; + const APInt False(BitWidth, 0); + std::optional LHSBool = + KnownBits::ne(LHSKnown, KnownBits::makeConstant(False)); + std::optional RHSBool = + KnownBits::ne(RHSKnown, KnownBits::makeConstant(False)); + if (LHSBool && RHSBool) + CompareRes = *LHSBool && *RHSBool; + KBM[Expr] = fromOptionalToKnownBits(CompareRes); + return; + } + case MCBinaryExpr::Opcode::LOr: { + const APInt False(BitWidth, 0); + KnownBits Bits = LHSKnown | RHSKnown; + std::optional CompareRes = + KnownBits::ne(Bits, KnownBits::makeConstant(False)); + KBM[Expr] = fromOptionalToKnownBits(CompareRes); + return; + } + case MCBinaryExpr::Opcode::LT: { + std::optional CompareRes = KnownBits::slt(LHSKnown, RHSKnown); + KBM[Expr] = fromOptionalToKnownBits(CompareRes); + return; + } + case MCBinaryExpr::Opcode::LTE: { + std::optional CompareRes = KnownBits::sle(LHSKnown, RHSKnown); + KBM[Expr] = fromOptionalToKnownBits(CompareRes); + return; + } + case MCBinaryExpr::Opcode::Mod: + KBM[Expr] = KnownBits::srem(LHSKnown, RHSKnown); + return; + case MCBinaryExpr::Opcode::Mul: + KBM[Expr] = KnownBits::mul(LHSKnown, RHSKnown); + return; + case MCBinaryExpr::Opcode::Or: + KBM[Expr] = LHSKnown | RHSKnown; + return; + case MCBinaryExpr::Opcode::Shl: + KBM[Expr] = KnownBits::shl(LHSKnown, RHSKnown); + return; + case MCBinaryExpr::Opcode::AShr: + KBM[Expr] = KnownBits::ashr(LHSKnown, RHSKnown); + return; + case MCBinaryExpr::Opcode::LShr: + KBM[Expr] = KnownBits::lshr(LHSKnown, RHSKnown); + return; + case MCBinaryExpr::Opcode::Sub: + KBM[Expr] = KnownBits::sub(LHSKnown, RHSKnown); + return; + case MCBinaryExpr::Opcode::Xor: + KBM[Expr] = LHSKnown ^ RHSKnown; + return; + } +} + +static void unaryOpKnownBitsMapHelper(const MCExpr *Expr, KnownBitsMap &KBM, + unsigned Depth) { + static constexpr unsigned BitWidth = 64; + const MCUnaryExpr *UExpr = cast(Expr); + knownBitsMapHelper(UExpr->getSubExpr(), KBM, Depth + 1); + KnownBits KB = KBM[UExpr->getSubExpr()]; + + switch (UExpr->getOpcode()) { + default: + KBM[Expr] = KnownBits(BitWidth); + return; + case MCUnaryExpr::Opcode::Minus: { + KB.makeNegative(); + KBM[Expr] = KB; + return; + } + case MCUnaryExpr::Opcode::Not: { + KnownBits AllOnes(BitWidth); + AllOnes.setAllOnes(); + KBM[Expr] = KB ^ AllOnes; + return; + } + case MCUnaryExpr::Opcode::Plus: { + KB.makeNonNegative(); + KBM[Expr] = KB; + return; + } + } +} + +static void targetOpKnownBitsMapHelper(const MCExpr *Expr, KnownBitsMap &KBM, + unsigned Depth) { + static constexpr unsigned BitWidth = 64; + const AMDGPUMCExpr *AGVK = cast(Expr); + + switch (AGVK->getKind()) { + default: + KBM[Expr] = KnownBits(BitWidth); + return; + case AMDGPUMCExpr::VariantKind::AGVK_Or: { + knownBitsMapHelper(AGVK->getSubExpr(0), KBM, Depth + 1); + KnownBits KB = KBM[AGVK->getSubExpr(0)]; + for (const MCExpr *Arg : AGVK->getArgs()) { + knownBitsMapHelper(Arg, KBM, Depth + 1); + KB |= KBM[Arg]; + } + KBM[Expr] = KB; + return; + } + case AMDGPUMCExpr::VariantKind::AGVK_Max: { + knownBitsMapHelper(AGVK->getSubExpr(0), KBM, Depth + 1); + KnownBits KB = KBM[AGVK->getSubExpr(0)]; + for (const MCExpr *Arg : AGVK->getArgs()) { + knownBitsMapHelper(Arg, KBM, Depth + 1); + KB = KnownBits::umax(KB, KBM[Arg]); + } + KBM[Expr] = KB; + return; + } + case AMDGPUMCExpr::VariantKind::AGVK_ExtraSGPRs: + case AMDGPUMCExpr::VariantKind::AGVK_TotalNumVGPRs: + case AMDGPUMCExpr::VariantKind::AGVK_AlignTo: + case AMDGPUMCExpr::VariantKind::AGVK_Occupancy: { + int64_t Val; + if (AGVK->evaluateAsAbsolute(Val)) { + APInt APValue(BitWidth, Val); + KBM[Expr] = KnownBits::makeConstant(APValue); + return; + } + KBM[Expr] = KnownBits(BitWidth); + return; + } + } +} + +static void knownBitsMapHelper(const MCExpr *Expr, KnownBitsMap &KBM, + unsigned Depth) { + static constexpr unsigned BitWidth = 64; + + int64_t Val; + if (Expr->evaluateAsAbsolute(Val)) { + APInt APValue(BitWidth, Val, /*isSigned=*/true); + KBM[Expr] = KnownBits::makeConstant(APValue); + return; + } + + if (Depth == 16) { + KBM[Expr] = KnownBits(BitWidth); + return; + } + + switch (Expr->getKind()) { + case MCExpr::ExprKind::Binary: { + binaryOpKnownBitsMapHelper(Expr, KBM, Depth); + return; + } + case MCExpr::ExprKind::Constant: { + const MCConstantExpr *CE = cast(Expr); + APInt APValue(BitWidth, CE->getValue(), /*isSigned=*/true); + KBM[Expr] = KnownBits::makeConstant(APValue); + return; + } + case MCExpr::ExprKind::SymbolRef: { + const MCSymbolRefExpr *RExpr = cast(Expr); + const MCSymbol &Sym = RExpr->getSymbol(); + if (!Sym.isVariable()) { + KBM[Expr] = KnownBits(BitWidth); + return; + } + + // Variable value retrieval is not for actual use but only for knownbits + // analysis. + knownBitsMapHelper(Sym.getVariableValue(/*SetUsed=*/false), KBM, Depth + 1); + KBM[Expr] = KBM[Sym.getVariableValue(/*SetUsed=*/false)]; + return; + } + case MCExpr::ExprKind::Unary: { + unaryOpKnownBitsMapHelper(Expr, KBM, Depth); + return; + } + case MCExpr::ExprKind::Target: { + targetOpKnownBitsMapHelper(Expr, KBM, Depth); + return; + } + } +} + +static const MCExpr *tryFoldHelper(const MCExpr *Expr, KnownBitsMap &KBM, + MCContext &Ctx) { + if (!KBM.count(Expr)) + return Expr; + + auto ValueCheckKnownBits = [](KnownBits &KB, unsigned Value) -> bool { + if (!KB.isConstant()) + return false; + + return Value == KB.getConstant(); + }; + + if (Expr->getKind() == MCExpr::ExprKind::Constant) + return Expr; + + // Resolving unary operations to constants may make the value more ambiguous. + // For example, `~62` becomes `-63`; however, to me it's more ambiguous if a + // bit mask value is represented through a negative number. + if (Expr->getKind() != MCExpr::ExprKind::Unary) { + if (KBM[Expr].isConstant()) { + APInt ConstVal = KBM[Expr].getConstant(); + return MCConstantExpr::create(ConstVal.getSExtValue(), Ctx); + } + + int64_t EvalValue; + if (Expr->evaluateAsAbsolute(EvalValue)) + return MCConstantExpr::create(EvalValue, Ctx); + } + + switch (Expr->getKind()) { + default: + return Expr; + case MCExpr::ExprKind::Binary: { + const MCBinaryExpr *BExpr = cast(Expr); + const MCExpr *LHS = BExpr->getLHS(); + const MCExpr *RHS = BExpr->getRHS(); + + switch (BExpr->getOpcode()) { + default: + return Expr; + case MCBinaryExpr::Opcode::Sub: { + if (ValueCheckKnownBits(KBM[RHS], 0)) + return tryFoldHelper(LHS, KBM, Ctx); + break; + } + case MCBinaryExpr::Opcode::Add: + case MCBinaryExpr::Opcode::Or: { + if (ValueCheckKnownBits(KBM[LHS], 0)) + return tryFoldHelper(RHS, KBM, Ctx); + if (ValueCheckKnownBits(KBM[RHS], 0)) + return tryFoldHelper(LHS, KBM, Ctx); + break; + } + case MCBinaryExpr::Opcode::Mul: { + if (ValueCheckKnownBits(KBM[LHS], 1)) + return tryFoldHelper(RHS, KBM, Ctx); + if (ValueCheckKnownBits(KBM[RHS], 1)) + return tryFoldHelper(LHS, KBM, Ctx); + break; + } + case MCBinaryExpr::Opcode::Shl: + case MCBinaryExpr::Opcode::AShr: + case MCBinaryExpr::Opcode::LShr: { + if (ValueCheckKnownBits(KBM[RHS], 0)) + return tryFoldHelper(LHS, KBM, Ctx); + if (ValueCheckKnownBits(KBM[LHS], 0)) + return MCConstantExpr::create(0, Ctx); + break; + } + case MCBinaryExpr::Opcode::And: { + if (ValueCheckKnownBits(KBM[LHS], 0) || ValueCheckKnownBits(KBM[RHS], 0)) + return MCConstantExpr::create(0, Ctx); + break; + } + } + const MCExpr *NewLHS = tryFoldHelper(LHS, KBM, Ctx); + const MCExpr *NewRHS = tryFoldHelper(RHS, KBM, Ctx); + if (NewLHS != LHS || NewRHS != RHS) + return MCBinaryExpr::create(BExpr->getOpcode(), NewLHS, NewRHS, Ctx, + BExpr->getLoc()); + return Expr; + } + case MCExpr::ExprKind::Unary: { + const MCUnaryExpr *UExpr = cast(Expr); + const MCExpr *SubExpr = UExpr->getSubExpr(); + const MCExpr *NewSubExpr = tryFoldHelper(SubExpr, KBM, Ctx); + if (SubExpr != NewSubExpr) + return MCUnaryExpr::create(UExpr->getOpcode(), NewSubExpr, Ctx, + UExpr->getLoc()); + return Expr; + } + case MCExpr::ExprKind::Target: { + const AMDGPUMCExpr *AGVK = cast(Expr); + SmallVector NewArgs; + bool Changed = false; + for (const MCExpr *Arg : AGVK->getArgs()) { + const MCExpr *NewArg = tryFoldHelper(Arg, KBM, Ctx); + NewArgs.push_back(NewArg); + Changed |= Arg != NewArg; + } + return Changed ? AMDGPUMCExpr::create(AGVK->getKind(), NewArgs, Ctx) : Expr; + } + } + return Expr; +} + +const MCExpr *llvm::AMDGPU::foldAMDGPUMCExpr(const MCExpr *Expr, + MCContext &Ctx) { + KnownBitsMap KBM; + knownBitsMapHelper(Expr, KBM); + const MCExpr *NewExpr = tryFoldHelper(Expr, KBM, Ctx); + + return Expr != NewExpr ? NewExpr : Expr; +} + +void llvm::AMDGPU::printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, + const MCAsmInfo *MAI) { + int64_t Val; + if (Expr->evaluateAsAbsolute(Val)) { + OS << Val; + return; + } + + Expr->print(OS, MAI); +} diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h index 970802dab69b60a..a16843f404b8f62 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h @@ -90,6 +90,7 @@ class AMDGPUMCExpr : public MCTargetExpr { const GCNSubtarget &STM, MCContext &Ctx); + ArrayRef getArgs() const { return Args; } VariantKind getKind() const { return Kind; } const MCExpr *getSubExpr(size_t Index) const; @@ -105,6 +106,18 @@ class AMDGPUMCExpr : public MCTargetExpr { } }; +namespace AMDGPU { +// Tries to leverage KnownBits for MCExprs to reduce and limit any composed +// MCExprs printing. E.g., for an expression such as +// ((unevaluatable_sym | 1) & 1) won't evaluate due to unevaluatable_sym and +// would verbosely print the full expression; however, KnownBits should deduce +// the value to be 1. Particularly useful for AMDGPU metadata MCExprs. +void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, + const MCAsmInfo *MAI); + +const MCExpr *foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx); + +} // end namespace AMDGPU } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCEXPR_H diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 8c78db8e83c42d6..73d466abc66f7b7 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUTargetStreamer.h" +#include "AMDGPUMCExpr.h" #include "AMDGPUMCKernelDescriptor.h" #include "AMDGPUPTNote.h" #include "Utils/AMDGPUBaseInfo.h" @@ -244,8 +245,13 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion( } void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) { + auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS, + const MCAsmInfo *MAI) { + printAMDGPUMCExpr(foldAMDGPUMCExpr(Expr, getContext()), OS, MAI); + }; + OS << "\t.amd_kernel_code_t\n"; - Header.EmitKernelCodeT(OS, getContext()); + Header.EmitKernelCodeT(OS, getContext(), FoldAndPrint); OS << "\t.end_amd_kernel_code_t\n"; } @@ -329,24 +335,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask, StringRef Directive) { - int64_t IVal; OS << "\t\t" << Directive << ' '; - const MCExpr *pgm_rsrc1_bits = + const MCExpr *ShiftedAndMaskedExpr = MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext()); - if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal)) - OS << static_cast(IVal); - else - pgm_rsrc1_bits->print(OS, MAI); + const MCExpr *New = foldAMDGPUMCExpr(ShiftedAndMaskedExpr, getContext()); + printAMDGPUMCExpr(New, OS, MAI); OS << '\n'; }; auto EmitMCExpr = [&](const MCExpr *Value) { - int64_t evaluatableValue; - if (Value->evaluateAsAbsolute(evaluatableValue)) { - OS << static_cast(evaluatableValue); - } else { - Value->print(OS, MAI); - } + const MCExpr *NewExpr = foldAMDGPUMCExpr(Value, getContext()); + printAMDGPUMCExpr(NewExpr, OS, MAI); }; OS << "\t\t.amdhsa_group_segment_fixed_size "; @@ -462,7 +461,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( accum_bits = MCBinaryExpr::createMul( accum_bits, MCConstantExpr::create(4, getContext()), getContext()); OS << "\t\t.amdhsa_accum_offset "; - EmitMCExpr(accum_bits); + const MCExpr *New = foldAMDGPUMCExpr(accum_bits, getContext()); + printAMDGPUMCExpr(New, OS, MAI); OS << '\n'; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp index 720d5a1853dbbd7..7b88ddb7b0e95d1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp @@ -226,35 +226,35 @@ class PrintField { template , T> * = nullptr> static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C, - raw_ostream &OS, MCContext &Ctx) { + raw_ostream &OS, MCContext &Ctx, + AMDGPUMCKernelCodeT::PrintHelper Helper) { OS << Name << " = "; const MCExpr *Value = C.*ptr; - int64_t Val; - if (Value->evaluateAsAbsolute(Val)) - OS << Val; - else - Value->print(OS, Ctx.getAsmInfo()); + Helper(Value, OS, Ctx.getAsmInfo()); } template , T> * = nullptr> static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C, - raw_ostream &OS, MCContext &) { + raw_ostream &OS, MCContext &, + AMDGPUMCKernelCodeT::PrintHelper) { OS << Name << " = " << (int)(C.*ptr); } }; template static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C, - raw_ostream &OS, MCContext &) { + raw_ostream &OS, MCContext &, + AMDGPUMCKernelCodeT::PrintHelper) { const auto Mask = (static_cast(1) << width) - 1; OS << Name << " = " << (int)((C.*ptr >> shift) & Mask); } using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &, - MCContext &); + MCContext &, AMDGPUMCKernelCodeT::PrintHelper Helper); -static ArrayRef getPrinterTable() { +static ArrayRef +getPrinterTable(AMDGPUMCKernelCodeT::PrintHelper Helper) { static const PrintFx Table[] = { #define COMPPGM1(name, aname, AccMacro) \ COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0) @@ -263,7 +263,7 @@ static ArrayRef getPrinterTable() { #define PRINTFIELD(sname, aname, name) PrintField::printField #define PRINTCOMP(Complement, PGMType) \ [](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS, \ - MCContext &Ctx) { \ + MCContext &Ctx, AMDGPUMCKernelCodeT::PrintHelper Helper) { \ OS << Name << " = "; \ auto [Shift, Mask] = getShiftMask(Complement); \ const MCExpr *Value; \ @@ -274,11 +274,7 @@ static ArrayRef getPrinterTable() { Value = \ maskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \ } \ - int64_t Val; \ - if (Value->evaluateAsAbsolute(Val)) \ - OS << Val; \ - else \ - Value->print(OS, Ctx.getAsmInfo()); \ + Helper(Value, OS, Ctx.getAsmInfo()); \ } #define RECORD(name, altName, print, parse) print #include "Utils/AMDKernelCodeTInfo.h" @@ -379,10 +375,11 @@ static ArrayRef getParserTable() { } static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex, - raw_ostream &OS, MCContext &Ctx) { - auto Printer = getPrinterTable()[FldIndex]; + raw_ostream &OS, MCContext &Ctx, + AMDGPUMCKernelCodeT::PrintHelper Helper) { + auto Printer = getPrinterTable(Helper)[FldIndex]; if (Printer) - Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx); + Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx, Helper); } void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI, @@ -459,20 +456,17 @@ bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, return Parser ? Parser(*this, MCParser, Err) : false; } -void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx) { +void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx, + PrintHelper Helper) { const int Size = hasMCExprVersionTable().size(); for (int i = 0; i < Size; ++i) { OS << "\t\t"; if (hasMCExprVersionTable()[i]) { OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = "; - int64_t Val; const MCExpr *Value = getMCExprForIndex(i); - if (Value->evaluateAsAbsolute(Val)) - OS << Val; - else - Value->print(OS, Ctx.getAsmInfo()); + Helper(Value, OS, Ctx.getAsmInfo()); } else { - printAmdKernelCodeField(*this, i, OS, Ctx); + printAmdKernelCodeField(*this, i, OS, Ctx, Helper); } OS << '\n'; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h index 6aeb98f1ce147b8..39acd5c43053bf5 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h @@ -27,6 +27,7 @@ class MCExpr; class MCStreamer; class MCSubtargetInfo; class raw_ostream; +class MCAsmInfo; namespace AMDGPU { struct AMDGPUMCKernelCodeT { @@ -79,8 +80,10 @@ struct AMDGPUMCKernelCodeT { const MCExpr *&getMCExprForIndex(int Index); + using PrintHelper = + function_ref; bool ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, raw_ostream &Err); - void EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx); + void EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx, PrintHelper Helper); void EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx); }; diff --git a/llvm/test/MC/AMDGPU/amd_kernel_code_t.s b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s index 052ec0bfabb849d..507001a2657c5f5 100644 --- a/llvm/test/MC/AMDGPU/amd_kernel_code_t.s +++ b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s @@ -131,17 +131,17 @@ unknown_workitem_private_segment_byte_size: s_endpgm ; ASM-LABEL: unknown_granulated_workitem_vgpr_count: -; ASM: granulated_workitem_vgpr_count = ((0&4294967232)|(unknown&63))&63 -; ASM: granulated_wavefront_sgpr_count = (((0&4294967232)|(unknown&63))>>6)&15 -; ASM: priority = (((0&4294967232)|(unknown&63))>>10)&3 -; ASM: float_mode = (((0&4294967232)|(unknown&63))>>12)&255 -; ASM: priv = (((0&4294967232)|(unknown&63))>>20)&1 -; ASM: enable_dx10_clamp = (((0&4294967232)|(unknown&63))>>21)&1 -; ASM: debug_mode = (((0&4294967232)|(unknown&63))>>22)&1 -; ASM: enable_ieee_mode = (((0&4294967232)|(unknown&63))>>23)&1 -; ASM: enable_wgp_mode = (((0&4294967232)|(unknown&63))>>29)&1 -; ASM: enable_mem_ordered = (((0&4294967232)|(unknown&63))>>30)&1 -; ASM: enable_fwd_progress = (((0&4294967232)|(unknown&63))>>31)&1 +; ASM: granulated_workitem_vgpr_count = (unknown&63)&63 +; ASM: granulated_wavefront_sgpr_count = 0 +; ASM: priority = 0 +; ASM: float_mode = 0 +; ASM: priv = 0 +; ASM: enable_dx10_clamp = 0 +; ASM: debug_mode = 0 +; ASM: enable_ieee_mode = 0 +; ASM: enable_wgp_mode = 0 +; ASM: enable_mem_ordered = 0 +; ASM: enable_fwd_progress = 0 .section .unknown_granulated_workitem_vgpr_count unknown_granulated_workitem_vgpr_count: .amd_kernel_code_t @@ -150,17 +150,9 @@ unknown_granulated_workitem_vgpr_count: s_endpgm ; ASM-LABEL: unknown_enable_sgpr_workgroup_id_x: -; ASM: enable_sgpr_private_segment_wave_byte_offset = ((0&4294967167)|((unknown&1)<<7))&1 -; ASM: user_sgpr_count = (((0&4294967167)|((unknown&1)<<7))>>1)&31 -; ASM: enable_trap_handler = (((0&4294967167)|((unknown&1)<<7))>>6)&1 -; ASM: enable_sgpr_workgroup_id_x = (((0&4294967167)|((unknown&1)<<7))>>7)&1 -; ASM: enable_sgpr_workgroup_id_y = (((0&4294967167)|((unknown&1)<<7))>>8)&1 -; ASM: enable_sgpr_workgroup_id_z = (((0&4294967167)|((unknown&1)<<7))>>9)&1 -; ASM: enable_sgpr_workgroup_info = (((0&4294967167)|((unknown&1)<<7))>>10)&1 -; ASM: enable_vgpr_workitem_id = (((0&4294967167)|((unknown&1)<<7))>>11)&3 -; ASM: enable_exception_msb = (((0&4294967167)|((unknown&1)<<7))>>13)&3 -; ASM: granulated_lds_size = (((0&4294967167)|((unknown&1)<<7))>>15)&511 -; ASM: enable_exception = (((0&4294967167)|((unknown&1)<<7))>>24)&127 +; ASM: enable_sgpr_workgroup_id_x = (((unknown&1)<<7)>>7)&1 +; ASM: enable_sgpr_workgroup_id_y = 0 +; ASM: enable_sgpr_workgroup_id_z = 0 .section .unknown_enable_sgpr_workgroup_id_x unknown_enable_sgpr_workgroup_id_x: .amd_kernel_code_t diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s index af4cb1a008f9d92..bec717e4137df2b 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s @@ -106,44 +106,44 @@ expr_defined: // ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 // ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 // ASM-NEXT: .amdhsa_kernarg_size 0 -// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1)>>0 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4 -// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&32)>>5 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6 -// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_value+4 // ASM-NEXT: .amdhsa_next_free_sgpr defined_value+5 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean // ASM-NEXT: .amdhsa_reserve_flat_scratch defined_boolean // ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&786432)>>18 -// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&2097152)>>21 -// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&8388608)>>23 -// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&67108864)>>26 -// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&536870912)>>29 -// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&1073741824)>>30 -// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&2147483648)>>31 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31 // ASM-NEXT: .amdhsa_shared_vgpr_count 0 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_value, 41 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s index b6e4ddde3d7f9e2..85a7ad05b00f484 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s @@ -106,40 +106,40 @@ expr_defined: // ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 // ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 // ASM-NEXT: .amdhsa_kernarg_size 0 -// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6 -// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10 -// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_enable_private_segment ((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_value+4 // ASM-NEXT: .amdhsa_next_free_sgpr defined_value+5 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean -// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&786432)>>18 -// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&2097152)>>21 -// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&8388608)>>23 -// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&67108864)>>26 -// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&536870912)>>29 -// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&1073741824)>>30 -// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&2147483648)>>31 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31 // ASM-NEXT: .amdhsa_shared_vgpr_count 0 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_value, 41 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s index a80000dc44dac0d..51d0fb30b320c52 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s @@ -108,38 +108,38 @@ expr_defined: // ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 // ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 // ASM-NEXT: .amdhsa_kernarg_size 0 -// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6 -// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10 -// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_enable_private_segment ((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_value+4 // ASM-NEXT: .amdhsa_next_free_sgpr defined_value+5 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean -// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&786432)>>18 -// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&67108864)>>26 -// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&536870912)>>29 -// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&1073741824)>>30 -// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&2147483648)>>31 -// ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|((((alignto(max(defined_value+4, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max(0, 1), 8))/8)-1)<<6))&2097152)>>21 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&786432)>>18 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31 +// ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_value, 41 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s index 7ab2e2b28a0e6ec..485f48c695c4dea 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s @@ -98,37 +98,37 @@ expr_defined: // ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 // ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 // ASM-NEXT: .amdhsa_kernarg_size 0 -// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4 -// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_value+4 // ASM-NEXT: .amdhsa_next_free_sgpr defined_value+5 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean // ASM-NEXT: .amdhsa_reserve_flat_scratch defined_boolean -// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&786432)>>18 -// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&2097152)>>21 -// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&8388608)>>23 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .amdhsa_float_round_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_value, 41 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s index caccde7ba0e3b8e..0d2e066113ee861 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s @@ -99,38 +99,38 @@ expr_defined: // ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 // ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 // ASM-NEXT: .amdhsa_kernarg_size 0 -// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4 -// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_value+4 // ASM-NEXT: .amdhsa_next_free_sgpr defined_value+5 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean // ASM-NEXT: .amdhsa_reserve_flat_scratch defined_boolean // ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&786432)>>18 -// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&2097152)>>21 -// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|((((alignto(max(defined_value+4, 1), 4))/4)-1)<<0))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&8388608)>>23 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .amdhsa_float_round_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_value, 41 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s index 216ae4c42a3d9a9..88b5e23a6f2c5f1 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s @@ -70,7 +70,7 @@ expr_defined: // ASM-NEXT: .amdhsa_group_segment_fixed_size 0 // ASM-NEXT: .amdhsa_private_segment_fixed_size 0 // ASM-NEXT: .amdhsa_kernarg_size 0 -// ASM-NEXT: .amdhsa_user_sgpr_count (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -80,33 +80,33 @@ expr_defined: // ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0 // ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((128|defined_boolean)&(~62))&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((128|defined_boolean)&(~62))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((128|defined_boolean)&(~62))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((128|defined_boolean)&(~62))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((128|defined_boolean)&(~62))&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_boolean+1 // ASM-NEXT: .amdhsa_next_free_sgpr defined_boolean+2 -// ASM-NEXT: .amdhsa_accum_offset (((((((0&(~65536))|(defined_boolean<<16))&(~63))|(((4/4)-1)<<0))&63)>>0)+1)*4 +// ASM-NEXT: .amdhsa_accum_offset 4 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean // ASM-NEXT: .amdhsa_reserve_flat_scratch defined_boolean // ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|((((alignto(max(defined_boolean+1, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|((((alignto(max(defined_boolean+1, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|((((alignto(max(defined_boolean+1, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|((((alignto(max(defined_boolean+1, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&786432)>>18 -// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|((((alignto(max(defined_boolean+1, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&2097152)>>21 -// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|((((alignto(max(defined_boolean+1, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&8388608)>>23 -// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|((((alignto(max(defined_boolean+1, 1), 8))/8)-1)<<0))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&67108864)>>26 -// ASM-NEXT: .amdhsa_tg_split (((((0&(~65536))|(defined_boolean<<16))&(~63))|(((4/4)-1)<<0))&65536)>>16 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .amdhsa_float_round_mode_32 ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&67108864)>>26 +// ASM-NEXT: .amdhsa_tg_split (((defined_boolean<<16)&(~63))&65536)>>16 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((128|defined_boolean)&(~62))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((128|defined_boolean)&(~62))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((128|defined_boolean)&(~62))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((128|defined_boolean)&(~62))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((128|defined_boolean)&(~62))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((128|defined_boolean)&(~62))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((128|defined_boolean)&(~62))&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_boolean, 1 From d7aeea626dac64449fc67cf8ddf8f326a0157d91 Mon Sep 17 00:00:00 2001 From: Lukacma Date: Thu, 15 Aug 2024 13:52:35 +0100 Subject: [PATCH 021/441] [AArch64] optimise SVE prefetch intrinsics with no active lanes (#103052) This patch extends https://github.com/llvm/llvm-project/pull/73964 and optimises away SVE prefetch intrinsics when predicate is zero. --- .../AArch64/AArch64TargetTransformInfo.cpp | 18 ++ .../sve-intrinsic-comb-no-active-lanes-prf.ll | 156 ++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a416565392eabec..a782c9c43512379 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2165,6 +2165,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset: case Intrinsic::aarch64_sve_ldnt1_gather_uxtw: return instCombineSVENoActiveUnaryZero(IC, II); + case Intrinsic::aarch64_sve_prf: + case Intrinsic::aarch64_sve_prfb_gather_index: + case Intrinsic::aarch64_sve_prfb_gather_scalar_offset: + case Intrinsic::aarch64_sve_prfb_gather_sxtw_index: + case Intrinsic::aarch64_sve_prfb_gather_uxtw_index: + case Intrinsic::aarch64_sve_prfd_gather_index: + case Intrinsic::aarch64_sve_prfd_gather_scalar_offset: + case Intrinsic::aarch64_sve_prfd_gather_sxtw_index: + case Intrinsic::aarch64_sve_prfd_gather_uxtw_index: + case Intrinsic::aarch64_sve_prfh_gather_index: + case Intrinsic::aarch64_sve_prfh_gather_scalar_offset: + case Intrinsic::aarch64_sve_prfh_gather_sxtw_index: + case Intrinsic::aarch64_sve_prfh_gather_uxtw_index: + case Intrinsic::aarch64_sve_prfw_gather_index: + case Intrinsic::aarch64_sve_prfw_gather_scalar_offset: + case Intrinsic::aarch64_sve_prfw_gather_sxtw_index: + case Intrinsic::aarch64_sve_prfw_gather_uxtw_index: + return instCombineSVENoActiveUnaryErase(IC, II, 0); case Intrinsic::aarch64_neon_fmaxnm: case Intrinsic::aarch64_neon_fminnm: return instCombineMaxMinNM(IC, II); diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll new file mode 100644 index 000000000000000..1de582dff52dad5 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll @@ -0,0 +1,156 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + +define void @test_prf(ptr %base){ +; CHECK-LABEL: define void @test_prf( +; CHECK-SAME: ptr [[BASE:%.*]]) { +; CHECK-NEXT: ret void +; + tail call void @llvm.aarch64.sve.prf.nxv16i1( zeroinitializer, ptr %base, i32 1) + ret void +} + +define void @test_prfb_gather_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfb_gather_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfb.gather.index.nx2vi64( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfb_gather_scalar_offset( %bases){ +; CHECK-LABEL: define void @test_prfb_gather_scalar_offset( +; CHECK-SAME: [[BASES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx4vi32( zeroinitializer, %bases, i64 7, i32 1) + ret void +} + +define void @test_prfb_gather_sxtw_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfb_gather_sxtw_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nx4vi32( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfb_gather_uxtw_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfb_gather_uxtw_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nx4vi32( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfd_gather_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfd_gather_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfd.gather.index.nx2vi64( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfd_gather_scalar_offset( %bases){ +; CHECK-LABEL: define void @test_prfd_gather_scalar_offset( +; CHECK-SAME: [[BASES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32( zeroinitializer, %bases, i64 7, i32 1) + ret void +} + +define void @test_prfd_gather_sxtw_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfd_gather_sxtw_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nx4vi32( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfd_gather_uxtw_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfd_gather_uxtw_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nx4vi32( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfh_gather_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfh_gather_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfh.gather.index.nx2vi64( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfh_gather_scalar_offset( %bases){ +; CHECK-LABEL: define void @test_prfh_gather_scalar_offset( +; CHECK-SAME: [[BASES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32( zeroinitializer, %bases, i64 7, i32 1) + ret void +} + +define void @test_prfh_gather_sxtw_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfh_gather_sxtw_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nx4vi32( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfh_gather_uxtw_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfh_gather_uxtw_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nx4vi32( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfw_gather_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfw_gather_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfw.gather.index.nx2vi64( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfw_gather_scalar_offset( %bases){ +; CHECK-LABEL: define void @test_prfw_gather_scalar_offset( +; CHECK-SAME: [[BASES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32( zeroinitializer, %bases, i64 7, i32 1) + ret void +} + +define void @test_prfw_gather_sxtw_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfw_gather_sxtw_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nx4vi32( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} + +define void @test_prfw_gather_uxtw_index(ptr %base, %indexes){ +; CHECK-LABEL: define void @test_prfw_gather_uxtw_index( +; CHECK-SAME: ptr [[BASE:%.*]], [[INDEXES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nx4vi32( zeroinitializer, ptr %base, %indexes, i32 1) + ret void +} From 82cf6558e50ea7fe024264cc2fb76ca20450fb82 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 15 Aug 2024 17:14:48 +0800 Subject: [PATCH 022/441] [LoongArch] Pre-commit tests for validating the merge base offset in vecotrs. NFC --- .../CodeGen/LoongArch/merge-base-offset.ll | 201 ++++++++++++++++-- 1 file changed, 188 insertions(+), 13 deletions(-) diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll index 32a4c4bdd1508a9..58a8e5d77c63fe7 100644 --- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll +++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+d --verify-machineinstrs < %s \ +; RUN: llc --mtriple=loongarch32 --mattr=+lasx --verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefix=LA32 %s -; RUN: llc --mtriple=loongarch64 --mattr=+d --verify-machineinstrs < %s \ +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefix=LA64 %s -; RUN: llc --mtriple=loongarch64 --mattr=+d --verify-machineinstrs \ +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs \ ; RUN: --code-model=large < %s | FileCheck --check-prefix=LA64-LARGE %s @g_i8 = dso_local global i8 0 @@ -405,10 +405,7 @@ define dso_local void @store_f64() nounwind { ; LA32-LABEL: store_f64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_f64) -; LA32-NEXT: addi.w $a1, $zero, 1 -; LA32-NEXT: movgr2fr.w $fa0, $a1 -; LA32-NEXT: ffint.s.w $fa0, $fa0 -; LA32-NEXT: fcvt.d.s $fa0, $fa0 +; LA32-NEXT: vldi $vr0, -912 ; LA32-NEXT: fst.d $fa0, $a0, %pc_lo12(g_f64) ; LA32-NEXT: ret ; @@ -538,6 +535,184 @@ entry: ret void } +@g_i32x4_src = dso_local global [4 x i32] zeroinitializer, align 16 +@g_i32x4_dst = dso_local global [4 x i32] zeroinitializer, align 16 + +define dso_local void @copy_i32x4() nounwind { +; LA32-LABEL: copy_i32x4: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_src) +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_dst) +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: copy_i32x4: +; LA64: # %bb.0: # %entry +; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_src) +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_dst) +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret +; +; LA64-LARGE-LABEL: copy_i32x4: +; LA64-LARGE: # %bb.0: # %entry +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_src) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_src) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_src) +; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 +; LA64-LARGE-NEXT: vld $vr0, $a0, 0 +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_dst) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_dst) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_dst) +; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 +; LA64-LARGE-NEXT: vst $vr0, $a0, 0 +; LA64-LARGE-NEXT: ret +entry: + %0 = load <4 x i32>, ptr @g_i32x4_src, align 16 + store <4 x i32> %0, ptr @g_i32x4_dst, align 16 + ret void +} + +@g_i32x8_src = dso_local global [8 x i32] zeroinitializer, align 32 +@g_i32x8_dst = dso_local global [8 x i32] zeroinitializer, align 32 + +define dso_local void @copy_i32x8() nounwind { +; LA32-LABEL: copy_i32x8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_src) +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_dst) +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: copy_i32x8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_src) +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_dst) +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret +; +; LA64-LARGE-LABEL: copy_i32x8: +; LA64-LARGE: # %bb.0: # %entry +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_src) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_src) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_src) +; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 +; LA64-LARGE-NEXT: xvld $xr0, $a0, 0 +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_dst) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_dst) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_dst) +; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 +; LA64-LARGE-NEXT: xvst $xr0, $a0, 0 +; LA64-LARGE-NEXT: ret +entry: + %0 = load <8 x i32>, ptr @g_i32x4_src, align 32 + store <8 x i32> %0, ptr @g_i32x4_dst, align 32 + ret void +} + +@g_i8x16 = dso_local global <16 x i8> zeroinitializer, align 16 + +define void @copy_i8_to_i8x16() { +; LA32-LABEL: copy_i8_to_i8x16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8) +; LA32-NEXT: vldrepl.b $vr0, $a0, 0 +; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8x16) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8x16) +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: copy_i8_to_i8x16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8) +; LA64-NEXT: vldrepl.b $vr0, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8x16) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8x16) +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret +; +; LA64-LARGE-LABEL: copy_i8_to_i8x16: +; LA64-LARGE: # %bb.0: # %entry +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i8) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i8) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i8) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i8) +; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 +; LA64-LARGE-NEXT: vldrepl.b $vr0, $a0, 0 +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i8x16) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i8x16) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i8x16) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i8x16) +; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 +; LA64-LARGE-NEXT: vst $vr0, $a0, 0 +; LA64-LARGE-NEXT: ret +entry: + %0 = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr @g_i8, i32 0) + store <16 x i8> %0, ptr @g_i8x16, align 16 + ret void +} + +@g_i8x32 = dso_local global <32 x i8> zeroinitializer, align 32 + +define void @copy_i8_to_i8x32() { +; LA32-LABEL: copy_i8_to_i8x32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8) +; LA32-NEXT: xvldrepl.b $xr0, $a0, 0 +; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8x32) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8x32) +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: copy_i8_to_i8x32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8) +; LA64-NEXT: xvldrepl.b $xr0, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8x32) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8x32) +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret +; +; LA64-LARGE-LABEL: copy_i8_to_i8x32: +; LA64-LARGE: # %bb.0: # %entry +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i8) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i8) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i8) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i8) +; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 +; LA64-LARGE-NEXT: xvldrepl.b $xr0, $a0, 0 +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i8x32) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i8x32) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i8x32) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i8x32) +; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 +; LA64-LARGE-NEXT: xvst $xr0, $a0, 0 +; LA64-LARGE-NEXT: ret +entry: + %0 = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr @g_i8, i32 0) + store <32 x i8> %0, ptr @g_i8x32, align 32 + ret void +} + @g_rmw = dso_local global i64 0 define dso_local void @rmw() nounwind { @@ -659,11 +834,11 @@ define dso_local void @control_flow_with_mem_access() nounwind { ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_a32+4) ; LA32-NEXT: ld.w $a1, $a0, %pc_lo12(g_a32+4) ; LA32-NEXT: ori $a2, $zero, 1 -; LA32-NEXT: blt $a1, $a2, .LBB21_2 +; LA32-NEXT: blt $a1, $a2, .LBB25_2 ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ori $a1, $zero, 10 ; LA32-NEXT: st.w $a1, $a0, %pc_lo12(g_a32+4) -; LA32-NEXT: .LBB21_2: # %if.end +; LA32-NEXT: .LBB25_2: # %if.end ; LA32-NEXT: ret ; ; LA64-LABEL: control_flow_with_mem_access: @@ -671,11 +846,11 @@ define dso_local void @control_flow_with_mem_access() nounwind { ; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_a32+4) ; LA64-NEXT: ld.w $a1, $a0, %pc_lo12(g_a32+4) ; LA64-NEXT: ori $a2, $zero, 1 -; LA64-NEXT: blt $a1, $a2, .LBB21_2 +; LA64-NEXT: blt $a1, $a2, .LBB25_2 ; LA64-NEXT: # %bb.1: # %if.then ; LA64-NEXT: ori $a1, $zero, 10 ; LA64-NEXT: st.w $a1, $a0, %pc_lo12(g_a32+4) -; LA64-NEXT: .LBB21_2: # %if.end +; LA64-NEXT: .LBB25_2: # %if.end ; LA64-NEXT: ret ; ; LA64-LARGE-LABEL: control_flow_with_mem_access: @@ -686,7 +861,7 @@ define dso_local void @control_flow_with_mem_access() nounwind { ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a32+4) ; LA64-LARGE-NEXT: ldx.w $a0, $a1, $a0 ; LA64-LARGE-NEXT: ori $a1, $zero, 1 -; LA64-LARGE-NEXT: blt $a0, $a1, .LBB21_2 +; LA64-LARGE-NEXT: blt $a0, $a1, .LBB25_2 ; LA64-LARGE-NEXT: # %bb.1: # %if.then ; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a32+4) ; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a32+4) @@ -694,7 +869,7 @@ define dso_local void @control_flow_with_mem_access() nounwind { ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a32+4) ; LA64-LARGE-NEXT: ori $a2, $zero, 10 ; LA64-LARGE-NEXT: stx.w $a2, $a1, $a0 -; LA64-LARGE-NEXT: .LBB21_2: # %if.end +; LA64-LARGE-NEXT: .LBB25_2: # %if.end ; LA64-LARGE-NEXT: ret entry: %0 = load i32, ptr getelementptr inbounds ([1 x i32], ptr @g_a32, i32 1), align 4 From 569698443d2b1dad04dc4daa4559d754deabe64e Mon Sep 17 00:00:00 2001 From: Andrea Faulds Date: Thu, 15 Aug 2024 15:11:31 +0200 Subject: [PATCH 023/441] [mlir][gpu] Fix typo in test filename (#104053) The word "redule" doesn't appear anywhere else in the MLIR codebase and seems to be a typo of "reduce". --- ...ubgroup-redule-lowering.mlir => subgroup-reduce-lowering.mlir} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename mlir/test/Dialect/GPU/{subgroup-redule-lowering.mlir => subgroup-reduce-lowering.mlir} (100%) diff --git a/mlir/test/Dialect/GPU/subgroup-redule-lowering.mlir b/mlir/test/Dialect/GPU/subgroup-reduce-lowering.mlir similarity index 100% rename from mlir/test/Dialect/GPU/subgroup-redule-lowering.mlir rename to mlir/test/Dialect/GPU/subgroup-reduce-lowering.mlir From d2a8351be2b0cc8572de3014f1bac1f03fa92617 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Thu, 15 Aug 2024 06:16:27 -0700 Subject: [PATCH 024/441] [CompilerRT][Tests] Fix profile/darwin-proof-of-concept.c (#104237) Fix profile/darwin-proof-of-concept.c on AppleSilicon Mac where there is a different page alignment. The previous fix to drop alignment is actually breaking the tests on Apple Silicon Mac. Revert to the original section alignment and requires an ARM64 target for this test to run. --- .../ContinuousSyncMode/darwin-proof-of-concept.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/compiler-rt/test/profile/ContinuousSyncMode/darwin-proof-of-concept.c b/compiler-rt/test/profile/ContinuousSyncMode/darwin-proof-of-concept.c index 3ed7c1894b6d106..d163a28d0e90ba6 100644 --- a/compiler-rt/test/profile/ContinuousSyncMode/darwin-proof-of-concept.c +++ b/compiler-rt/test/profile/ContinuousSyncMode/darwin-proof-of-concept.c @@ -4,12 +4,12 @@ // mode to a new platform, but is not in and of itself a test of the profiling // runtime. -// REQUIRES: darwin +// REQUIRES: darwin, target={{arm64.*}} // Align counters and data to the maximum expected page size (16K). // RUN: %clang -g -o %t %s \ -// RUN: -Wl,-sectalign,__DATA,__pcnts,0x1000 \ -// RUN: -Wl,-sectalign,__DATA,__pdata,0x1000 +// RUN: -Wl,-sectalign,__DATA,__pcnts,0x4000 \ +// RUN: -Wl,-sectalign,__DATA,__pdata,0x4000 // Create a 'profile' using mmap() and validate it. // RUN: %run %t create %t.tmpfile @@ -24,7 +24,7 @@ __attribute__((section("__DATA,__pcnts"))) int counters[] = {0xbad}; extern int cnts_start __asm("section$start$__DATA$__pcnts"); -const size_t cnts_len = 0x1000; +const size_t cnts_len = 0x4000; __attribute__((section("__DATA,__pdata"))) int data[] = {1, 2, 3}; extern int data_start __asm("section$start$__DATA$__pdata"); @@ -131,12 +131,12 @@ int main(int argc, char **argv) { fprintf(stderr, "__pcnts is not page-aligned: 0x%lx.\n", cnts_start_int); return EXIT_FAILURE; } - if (data_start_int % 0x1000 != 0) { + if (data_start_int % 0x4000 != 0) { fprintf(stderr, "__pdata is not correctly aligned: 0x%lx.\n", data_start_int); return EXIT_FAILURE; } - if (cnts_start_int + 0x1000 != data_start_int) { + if (cnts_start_int + 0x4000 != data_start_int) { fprintf(stderr, "__pdata not ordered after __pcnts.\n"); return EXIT_FAILURE; } From a4525fcc8f127139a493164c360725ae4c6c87b3 Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Thu, 15 Aug 2024 21:17:12 +0800 Subject: [PATCH 025/441] [CodeGen] Fix -Wcovered-switch-default in Combiner.cpp (NFC) /llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp:220:3: error: default label in switch which covers all enumeration values [-Werror,-Wcovered-switch-default] default: ^ 1 error generated. --- llvm/lib/CodeGen/GlobalISel/Combiner.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 75b2525e368af68..c5ec73cd5c65d83 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -217,9 +217,8 @@ Combiner::WorkListMaintainer::create(Level Lvl, WorkListTy &WorkList, case Level::SinglePass: return std::make_unique>(WorkList, MRI); - default: - llvm_unreachable("Illegal ObserverLevel"); } + llvm_unreachable("Illegal ObserverLevel"); } Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo, From 026d963cb004689477d2b5798cbba5ad41c25a70 Mon Sep 17 00:00:00 2001 From: Daniel Grumberg Date: Thu, 15 Aug 2024 14:19:49 +0100 Subject: [PATCH 026/441] [clang][ExtractAPI] Compute inherited availability information (#103040) Additionally this computes availability information for all platforms ahead of possibly introducing a flag to enable this behavior. rdar://123513706 --- clang/include/clang/AST/Availability.h | 4 + clang/lib/AST/Availability.cpp | 103 +++++++++-- .../Serialization/SymbolGraphSerializer.cpp | 31 ++-- .../test/ExtractAPI/inherited_availability.m | 175 ++++++++++++++++++ 4 files changed, 283 insertions(+), 30 deletions(-) create mode 100644 clang/test/ExtractAPI/inherited_availability.m diff --git a/clang/include/clang/AST/Availability.h b/clang/include/clang/AST/Availability.h index 26ae622e5b4496f..60ca1383f0a44ea 100644 --- a/clang/include/clang/AST/Availability.h +++ b/clang/include/clang/AST/Availability.h @@ -97,6 +97,10 @@ struct AvailabilityInfo { return UnconditionallyUnavailable; } + /// Augments the existing information with additional constraints provided by + /// \c Other. + void mergeWith(AvailabilityInfo Other); + AvailabilityInfo(StringRef Domain, VersionTuple I, VersionTuple D, VersionTuple O, bool U, bool UD, bool UU) : Domain(Domain), Introduced(I), Deprecated(D), Obsoleted(O), diff --git a/clang/lib/AST/Availability.cpp b/clang/lib/AST/Availability.cpp index 238359a2dedfcf4..cf040fc727d11f4 100644 --- a/clang/lib/AST/Availability.cpp +++ b/clang/lib/AST/Availability.cpp @@ -16,33 +16,104 @@ #include "clang/AST/Decl.h" #include "clang/Basic/TargetInfo.h" -namespace clang { +namespace { + +/// Represents the availability of a symbol across platforms. +struct AvailabilitySet { + bool UnconditionallyDeprecated = false; + bool UnconditionallyUnavailable = false; + + void insert(clang::AvailabilityInfo &&Availability) { + auto *Found = getForPlatform(Availability.Domain); + if (Found) + Found->mergeWith(std::move(Availability)); + else + Availabilities.emplace_back(std::move(Availability)); + } + + clang::AvailabilityInfo *getForPlatform(llvm::StringRef Domain) { + auto *It = llvm::find_if(Availabilities, + [Domain](const clang::AvailabilityInfo &Info) { + return Domain.compare(Info.Domain) == 0; + }); + return It == Availabilities.end() ? nullptr : It; + } -AvailabilityInfo AvailabilityInfo::createFromDecl(const Decl *Decl) { - ASTContext &Context = Decl->getASTContext(); - StringRef PlatformName = Context.getTargetInfo().getPlatformName(); - AvailabilityInfo Availability; +private: + llvm::SmallVector Availabilities; +}; +static void createInfoForDecl(const clang::Decl *Decl, + AvailabilitySet &Availabilities) { // Collect availability attributes from all redeclarations. for (const auto *RD : Decl->redecls()) { - for (const auto *A : RD->specific_attrs()) { - if (A->getPlatform()->getName() != PlatformName) - continue; - Availability = AvailabilityInfo( + for (const auto *A : RD->specific_attrs()) { + Availabilities.insert(clang::AvailabilityInfo( A->getPlatform()->getName(), A->getIntroduced(), A->getDeprecated(), - A->getObsoleted(), A->getUnavailable(), false, false); - break; + A->getObsoleted(), A->getUnavailable(), false, false)); } - if (const auto *A = RD->getAttr()) + if (const auto *A = RD->getAttr()) if (!A->isImplicit()) - Availability.UnconditionallyUnavailable = true; + Availabilities.UnconditionallyUnavailable = true; - if (const auto *A = RD->getAttr()) + if (const auto *A = RD->getAttr()) if (!A->isImplicit()) - Availability.UnconditionallyDeprecated = true; + Availabilities.UnconditionallyDeprecated = true; } - return Availability; +} + +} // namespace + +namespace clang { + +void AvailabilityInfo::mergeWith(AvailabilityInfo Other) { + if (isDefault() && Other.isDefault()) + return; + + if (Domain.empty()) + Domain = Other.Domain; + + UnconditionallyUnavailable |= Other.UnconditionallyUnavailable; + UnconditionallyDeprecated |= Other.UnconditionallyDeprecated; + Unavailable |= Other.Unavailable; + + Introduced = std::max(Introduced, Other.Introduced); + + // Default VersionTuple is 0.0.0 so if both are non default let's pick the + // smallest version number, otherwise select the one that is non-zero if there + // is one. + if (!Deprecated.empty() && !Other.Deprecated.empty()) + Deprecated = std::min(Deprecated, Other.Deprecated); + else + Deprecated = std::max(Deprecated, Other.Deprecated); + + if (!Obsoleted.empty() && !Other.Obsoleted.empty()) + Obsoleted = std::min(Obsoleted, Other.Obsoleted); + else + Obsoleted = std::max(Obsoleted, Other.Obsoleted); +} + +AvailabilityInfo AvailabilityInfo::createFromDecl(const Decl *D) { + AvailabilitySet Availabilities; + // Walk DeclContexts upwards starting from D to find the combined availability + // of the symbol. + for (const auto *Ctx = D; Ctx; + Ctx = llvm::cast_or_null(Ctx->getDeclContext())) + createInfoForDecl(Ctx, Availabilities); + + if (auto *Avail = Availabilities.getForPlatform( + D->getASTContext().getTargetInfo().getPlatformName())) { + Avail->UnconditionallyDeprecated = Availabilities.UnconditionallyDeprecated; + Avail->UnconditionallyUnavailable = + Availabilities.UnconditionallyUnavailable; + return std::move(*Avail); + } + + AvailabilityInfo Avail; + Avail.UnconditionallyDeprecated = Availabilities.UnconditionallyDeprecated; + Avail.UnconditionallyUnavailable = Availabilities.UnconditionallyUnavailable; + return Avail; } } // namespace clang diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index 6e56ee5b573f66a..84ed5467dd2fb91 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -171,22 +171,25 @@ std::optional serializeAvailability(const AvailabilityInfo &Avail) { UnconditionallyDeprecated["isUnconditionallyDeprecated"] = true; AvailabilityArray.emplace_back(std::move(UnconditionallyDeprecated)); } - Object Availability; - - Availability["domain"] = Avail.Domain; - - if (Avail.isUnavailable()) { - Availability["isUnconditionallyUnavailable"] = true; - } else { - serializeObject(Availability, "introduced", - serializeSemanticVersion(Avail.Introduced)); - serializeObject(Availability, "deprecated", - serializeSemanticVersion(Avail.Deprecated)); - serializeObject(Availability, "obsoleted", - serializeSemanticVersion(Avail.Obsoleted)); + + if (Avail.Domain.str() != "") { + Object Availability; + Availability["domain"] = Avail.Domain; + + if (Avail.isUnavailable()) { + Availability["isUnconditionallyUnavailable"] = true; + } else { + serializeObject(Availability, "introduced", + serializeSemanticVersion(Avail.Introduced)); + serializeObject(Availability, "deprecated", + serializeSemanticVersion(Avail.Deprecated)); + serializeObject(Availability, "obsoleted", + serializeSemanticVersion(Avail.Obsoleted)); + } + + AvailabilityArray.emplace_back(std::move(Availability)); } - AvailabilityArray.emplace_back(std::move(Availability)); return AvailabilityArray; } diff --git a/clang/test/ExtractAPI/inherited_availability.m b/clang/test/ExtractAPI/inherited_availability.m new file mode 100644 index 000000000000000..c24e7fa8e208f03 --- /dev/null +++ b/clang/test/ExtractAPI/inherited_availability.m @@ -0,0 +1,175 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing -triple arm64-apple-macosx \ +// RUN: -x objective-c-header %s -o %t/output.symbols.json -verify + + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix A +__attribute__((availability(macos, introduced=9.0, deprecated=12.0, obsoleted=20.0))) +@interface A +// A-LABEL: "!testLabel": "c:objc(cs)A" +// A: "availability": [ +// A-NEXT: { +// A-NEXT: "deprecated": { +// A-NEXT: "major": 12, +// A-NEXT: "minor": 0, +// A-NEXT: "patch": 0 +// A-NEXT: } +// A-NEXT: "domain": "macos" +// A-NEXT: "introduced": { +// A-NEXT: "major": 9, +// A-NEXT: "minor": 0, +// A-NEXT: "patch": 0 +// A-NEXT: } +// A-NEXT: "obsoleted": { +// A-NEXT: "major": 20, +// A-NEXT: "minor": 0, +// A-NEXT: "patch": 0 +// A-NEXT: } +// A-NEXT: } +// A-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix CP +@property(class) int CP; +// CP-LABEL: "!testLabel": "c:objc(cs)A(cpy)CP" +// CP: "availability": [ +// CP-NEXT: { +// CP-NEXT: "deprecated": { +// CP-NEXT: "major": 12, +// CP-NEXT: "minor": 0, +// CP-NEXT: "patch": 0 +// CP-NEXT: } +// CP-NEXT: "domain": "macos" +// CP-NEXT: "introduced": { +// CP-NEXT: "major": 9, +// CP-NEXT: "minor": 0, +// CP-NEXT: "patch": 0 +// CP-NEXT: } +// CP-NEXT: "obsoleted": { +// CP-NEXT: "major": 20, +// CP-NEXT: "minor": 0, +// CP-NEXT: "patch": 0 +// CP-NEXT: } +// CP-NEXT: } +// CP-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix IP +@property int IP; +// IP-LABEL: "!testLabel": "c:objc(cs)A(py)IP" +// IP: "availability": [ +// IP-NEXT: { +// IP-NEXT: "deprecated": { +// IP-NEXT: "major": 12, +// IP-NEXT: "minor": 0, +// IP-NEXT: "patch": 0 +// IP-NEXT: } +// IP-NEXT: "domain": "macos" +// IP-NEXT: "introduced": { +// IP-NEXT: "major": 9, +// IP-NEXT: "minor": 0, +// IP-NEXT: "patch": 0 +// IP-NEXT: } +// IP-NEXT: "obsoleted": { +// IP-NEXT: "major": 20, +// IP-NEXT: "minor": 0, +// IP-NEXT: "patch": 0 +// IP-NEXT: } +// IP-NEXT: } +// IP-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix MR +@property int moreRestrictive __attribute__((availability(macos, introduced=10.0, deprecated=11.0, obsoleted=19.0))); +// MR-LABEL: "!testLabel": "c:objc(cs)A(py)moreRestrictive" +// MR: "availability": [ +// MR-NEXT: { +// MR-NEXT: "deprecated": { +// MR-NEXT: "major": 11, +// MR-NEXT: "minor": 0, +// MR-NEXT: "patch": 0 +// MR-NEXT: } +// MR-NEXT: "domain": "macos" +// MR-NEXT: "introduced": { +// MR-NEXT: "major": 10, +// MR-NEXT: "minor": 0, +// MR-NEXT: "patch": 0 +// MR-NEXT: } +// MR-NEXT: "obsoleted": { +// MR-NEXT: "major": 19, +// MR-NEXT: "minor": 0, +// MR-NEXT: "patch": 0 +// MR-NEXT: } +// MR-NEXT: } +// MR-NEXT: ] + +@end + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix B +__attribute__((deprecated("B is deprecated"))) +@interface B +// B-LABEL: "!testLabel": "c:objc(cs)B" +// B: "availability": [ +// B-NEXT: { +// B-NEXT: "domain": "*" +// B-NEXT: "isUnconditionallyDeprecated": true +// B-NEXT: } +// B-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix BIP +@property int BIP; +// BIP-LABEL: "!testLabel": "c:objc(cs)B(py)BIP" +// BIP: "availability": [ +// BIP-NEXT: { +// BIP-NEXT: "domain": "*" +// BIP-NEXT: "isUnconditionallyDeprecated": true +// BIP-NEXT: } +// BIP-NEXT: ] +@end + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix C +__attribute__((availability(macos, unavailable))) +@interface C +// C-LABEL: "!testLabel": "c:objc(cs)C" +// C: "availability": [ +// C-NEXT: { +// C-NEXT: "domain": "macos" +// C-NEXT: "isUnconditionallyUnavailable": true +// C-NEXT: } +// C-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix CIP +@property int CIP; +// CIP-LABEL: "!testLabel": "c:objc(cs)C(py)CIP" +// CIP: "availability": [ +// CIP-NEXT: { +// CIP-NEXT: "domain": "macos" +// CIP-NEXT: "isUnconditionallyUnavailable": true +// CIP-NEXT: } +// CIP-NEXT: ] +@end + +@interface D +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix DIP +@property int DIP __attribute__((availability(macos, introduced=10.0, deprecated=11.0, obsoleted=19.0))); +// DIP-LABEL: "!testLabel": "c:objc(cs)D(py)DIP" +// DIP: "availability": [ +// DIP-NEXT: { +// DIP-NEXT: "deprecated": { +// DIP-NEXT: "major": 11, +// DIP-NEXT: "minor": 0, +// DIP-NEXT: "patch": 0 +// DIP-NEXT: } +// DIP-NEXT: "domain": "macos" +// DIP-NEXT: "introduced": { +// DIP-NEXT: "major": 10, +// DIP-NEXT: "minor": 0, +// DIP-NEXT: "patch": 0 +// DIP-NEXT: } +// DIP-NEXT: "obsoleted": { +// DIP-NEXT: "major": 19, +// DIP-NEXT: "minor": 0, +// DIP-NEXT: "patch": 0 +// DIP-NEXT: } +// DIP-NEXT: } +// DIP-NEXT: ] +@end + +// expected-no-diagnostics From 6543bd718e6a4e4d6a8473b478f8a46d3eb1562a Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Thu, 15 Aug 2024 16:41:52 +0300 Subject: [PATCH 027/441] [DataLayout] Extract loop body into a function to reduce nesting (NFC) (#104420) Also, use `iterator_range` version of `split`. Pull Request: https://github.com/llvm/llvm-project/pull/104420 --- llvm/include/llvm/IR/DataLayout.h | 10 +- llvm/lib/IR/DataLayout.cpp | 495 ++++++++++++++------------- llvm/unittests/IR/DataLayoutTest.cpp | 10 + 3 files changed, 267 insertions(+), 248 deletions(-) diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 1185939cd9c75b7..228b723ee663ff7 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -165,9 +165,11 @@ class DataLayout { /// Internal helper method that returns requested alignment for type. Align getAlignment(Type *Ty, bool abi_or_pref) const; - /// Attempts to parse a target data specification string and reports an error - /// if the string is malformed. - Error parseSpecifier(StringRef Desc); + /// Attempts to parse a single specification. + Error parseSpecification(StringRef Specification); + + /// Attempts to parse a data layout string. + Error parseLayoutString(StringRef LayoutString); public: /// Constructs a DataLayout with default values. @@ -188,7 +190,7 @@ class DataLayout { /// Parse a data layout string and return the layout. Return an error /// description on failure. - static Expected parse(StringRef LayoutDescription); + static Expected parse(StringRef LayoutString); /// Layout endianness... bool isLittleEndian() const { return !BigEndian; } diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index 5f9a0fbc61905fc..24c29458abfa819 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -215,7 +216,7 @@ DataLayout::DataLayout() PointerSpecs(ArrayRef(DefaultPointerSpecs)) {} DataLayout::DataLayout(StringRef LayoutString) : DataLayout() { - if (Error Err = parseSpecifier(LayoutString)) + if (Error Err = parseLayoutString(LayoutString)) report_fatal_error(std::move(Err)); } @@ -260,9 +261,9 @@ bool DataLayout::operator==(const DataLayout &Other) const { StructPrefAlignment == Other.StructPrefAlignment; } -Expected DataLayout::parse(StringRef LayoutDescription) { +Expected DataLayout::parse(StringRef LayoutString) { DataLayout Layout; - if (Error Err = Layout.parseSpecifier(LayoutDescription)) + if (Error Err = Layout.parseLayoutString(LayoutString)) return std::move(Err); return Layout; } @@ -311,289 +312,295 @@ static Error getAddrSpace(StringRef R, unsigned &AddrSpace) { return Error::success(); } -Error DataLayout::parseSpecifier(StringRef Desc) { - StringRepresentation = std::string(Desc); - while (!Desc.empty()) { - // Split at '-'. - std::pair Split; - if (Error Err = ::split(Desc, '-', Split)) - return Err; - Desc = Split.second; - - // Split at ':'. - if (Error Err = ::split(Split.first, ':', Split)) - return Err; +Error DataLayout::parseSpecification(StringRef Spec) { + // Split at ':'. + std::pair Split; + if (Error Err = ::split(Spec, ':', Split)) + return Err; - // Aliases used below. - StringRef &Tok = Split.first; // Current token. - StringRef &Rest = Split.second; // The rest of the string. + // Aliases used below. + StringRef &Tok = Split.first; // Current token. + StringRef &Rest = Split.second; // The rest of the string. - if (Tok == "ni") { - do { - if (Error Err = ::split(Rest, ':', Split)) - return Err; - Rest = Split.second; - unsigned AS; - if (Error Err = getInt(Split.first, AS)) - return Err; - if (AS == 0) - return reportError("Address space 0 can never be non-integral"); - NonIntegralAddressSpaces.push_back(AS); - } while (!Rest.empty()); + if (Tok == "ni") { + do { + if (Error Err = ::split(Rest, ':', Split)) + return Err; + Rest = Split.second; + unsigned AS; + if (Error Err = getInt(Split.first, AS)) + return Err; + if (AS == 0) + return reportError("Address space 0 can never be non-integral"); + NonIntegralAddressSpaces.push_back(AS); + } while (!Rest.empty()); - continue; - } + return Error::success(); + } - char SpecifierChar = Tok.front(); - Tok = Tok.substr(1); + char SpecifierChar = Tok.front(); + Tok = Tok.substr(1); - switch (SpecifierChar) { - case 's': - // Deprecated, but ignoring here to preserve loading older textual llvm - // ASM file - break; - case 'E': - BigEndian = true; - break; - case 'e': - BigEndian = false; - break; - case 'p': { - // Address space. - unsigned AddrSpace = 0; - if (!Tok.empty()) - if (Error Err = getInt(Tok, AddrSpace)) - return Err; - if (!isUInt<24>(AddrSpace)) - return reportError("Invalid address space, must be a 24-bit integer"); - - // Size. - if (Rest.empty()) - return reportError( - "Missing size specification for pointer in datalayout string"); - if (Error Err = ::split(Rest, ':', Split)) - return Err; - unsigned PointerMemSize; - if (Error Err = getInt(Tok, PointerMemSize)) + switch (SpecifierChar) { + case 's': + // Deprecated, but ignoring here to preserve loading older textual llvm + // ASM file + break; + case 'E': + BigEndian = true; + break; + case 'e': + BigEndian = false; + break; + case 'p': { + // Address space. + unsigned AddrSpace = 0; + if (!Tok.empty()) + if (Error Err = getInt(Tok, AddrSpace)) return Err; - if (!PointerMemSize) - return reportError("Invalid pointer size of 0 bytes"); + if (!isUInt<24>(AddrSpace)) + return reportError("Invalid address space, must be a 24-bit integer"); + + // Size. + if (Rest.empty()) + return reportError( + "Missing size specification for pointer in datalayout string"); + if (Error Err = ::split(Rest, ':', Split)) + return Err; + unsigned PointerMemSize; + if (Error Err = getInt(Tok, PointerMemSize)) + return Err; + if (!PointerMemSize) + return reportError("Invalid pointer size of 0 bytes"); + + // ABI alignment. + if (Rest.empty()) + return reportError( + "Missing alignment specification for pointer in datalayout string"); + if (Error Err = ::split(Rest, ':', Split)) + return Err; + unsigned PointerABIAlign; + if (Error Err = getIntInBytes(Tok, PointerABIAlign)) + return Err; + if (!isPowerOf2_64(PointerABIAlign)) + return reportError("Pointer ABI alignment must be a power of 2"); - // ABI alignment. - if (Rest.empty()) - return reportError( - "Missing alignment specification for pointer in datalayout string"); + // Size of index used in GEP for address calculation. + // The parameter is optional. By default it is equal to size of pointer. + unsigned IndexSize = PointerMemSize; + + // Preferred alignment. + unsigned PointerPrefAlign = PointerABIAlign; + if (!Rest.empty()) { if (Error Err = ::split(Rest, ':', Split)) return Err; - unsigned PointerABIAlign; - if (Error Err = getIntInBytes(Tok, PointerABIAlign)) + if (Error Err = getIntInBytes(Tok, PointerPrefAlign)) return Err; - if (!isPowerOf2_64(PointerABIAlign)) - return reportError("Pointer ABI alignment must be a power of 2"); + if (!isPowerOf2_64(PointerPrefAlign)) + return reportError("Pointer preferred alignment must be a power of 2"); - // Size of index used in GEP for address calculation. - // The parameter is optional. By default it is equal to size of pointer. - unsigned IndexSize = PointerMemSize; - - // Preferred alignment. - unsigned PointerPrefAlign = PointerABIAlign; + // Now read the index. It is the second optional parameter here. if (!Rest.empty()) { if (Error Err = ::split(Rest, ':', Split)) return Err; - if (Error Err = getIntInBytes(Tok, PointerPrefAlign)) + if (Error Err = getInt(Tok, IndexSize)) return Err; - if (!isPowerOf2_64(PointerPrefAlign)) - return reportError( - "Pointer preferred alignment must be a power of 2"); - - // Now read the index. It is the second optional parameter here. - if (!Rest.empty()) { - if (Error Err = ::split(Rest, ':', Split)) - return Err; - if (Error Err = getInt(Tok, IndexSize)) - return Err; - if (!IndexSize) - return reportError("Invalid index size of 0 bytes"); - } + if (!IndexSize) + return reportError("Invalid index size of 0 bytes"); } - if (Error Err = setPointerSpec( - AddrSpace, PointerMemSize, assumeAligned(PointerABIAlign), - assumeAligned(PointerPrefAlign), IndexSize)) - return Err; - break; } + if (Error Err = setPointerSpec(AddrSpace, PointerMemSize, + assumeAligned(PointerABIAlign), + assumeAligned(PointerPrefAlign), IndexSize)) + return Err; + break; + } + case 'i': + case 'v': + case 'f': + case 'a': { + TypeSpecifier Specifier; + switch (SpecifierChar) { + default: + llvm_unreachable("Unexpected specifier!"); case 'i': + Specifier = TypeSpecifier::Integer; + break; case 'v': + Specifier = TypeSpecifier::Vector; + break; case 'f': - case 'a': { - TypeSpecifier Specifier; - switch (SpecifierChar) { - default: - llvm_unreachable("Unexpected specifier!"); - case 'i': - Specifier = TypeSpecifier::Integer; - break; - case 'v': - Specifier = TypeSpecifier::Vector; - break; - case 'f': - Specifier = TypeSpecifier::Float; - break; - case 'a': - Specifier = TypeSpecifier::Aggregate; - break; - } + Specifier = TypeSpecifier::Float; + break; + case 'a': + Specifier = TypeSpecifier::Aggregate; + break; + } - // Bit size. - unsigned Size = 0; - if (!Tok.empty()) - if (Error Err = getInt(Tok, Size)) - return Err; + // Bit size. + unsigned Size = 0; + if (!Tok.empty()) + if (Error Err = getInt(Tok, Size)) + return Err; - if (Specifier == TypeSpecifier::Aggregate && Size != 0) - return reportError( - "Sized aggregate specification in datalayout string"); + if (Specifier == TypeSpecifier::Aggregate && Size != 0) + return reportError("Sized aggregate specification in datalayout string"); - // ABI alignment. - if (Rest.empty()) - return reportError( - "Missing alignment specification in datalayout string"); + // ABI alignment. + if (Rest.empty()) + return reportError( + "Missing alignment specification in datalayout string"); + if (Error Err = ::split(Rest, ':', Split)) + return Err; + unsigned ABIAlign; + if (Error Err = getIntInBytes(Tok, ABIAlign)) + return Err; + if (Specifier != TypeSpecifier::Aggregate && !ABIAlign) + return reportError( + "ABI alignment specification must be >0 for non-aggregate types"); + + if (!isUInt<16>(ABIAlign)) + return reportError("Invalid ABI alignment, must be a 16bit integer"); + if (ABIAlign != 0 && !isPowerOf2_64(ABIAlign)) + return reportError("Invalid ABI alignment, must be a power of 2"); + if (Specifier == TypeSpecifier::Integer && Size == 8 && ABIAlign != 1) + return reportError("Invalid ABI alignment, i8 must be naturally aligned"); + + // Preferred alignment. + unsigned PrefAlign = ABIAlign; + if (!Rest.empty()) { if (Error Err = ::split(Rest, ':', Split)) return Err; - unsigned ABIAlign; - if (Error Err = getIntInBytes(Tok, ABIAlign)) + if (Error Err = getIntInBytes(Tok, PrefAlign)) return Err; - if (Specifier != TypeSpecifier::Aggregate && !ABIAlign) - return reportError( - "ABI alignment specification must be >0 for non-aggregate types"); + } - if (!isUInt<16>(ABIAlign)) - return reportError("Invalid ABI alignment, must be a 16bit integer"); - if (ABIAlign != 0 && !isPowerOf2_64(ABIAlign)) - return reportError("Invalid ABI alignment, must be a power of 2"); - if (Specifier == TypeSpecifier::Integer && Size == 8 && ABIAlign != 1) - return reportError( - "Invalid ABI alignment, i8 must be naturally aligned"); + if (!isUInt<16>(PrefAlign)) + return reportError( + "Invalid preferred alignment, must be a 16bit integer"); + if (PrefAlign != 0 && !isPowerOf2_64(PrefAlign)) + return reportError("Invalid preferred alignment, must be a power of 2"); - // Preferred alignment. - unsigned PrefAlign = ABIAlign; - if (!Rest.empty()) { - if (Error Err = ::split(Rest, ':', Split)) - return Err; - if (Error Err = getIntInBytes(Tok, PrefAlign)) - return Err; - } + if (Error Err = setPrimitiveSpec(Specifier, Size, assumeAligned(ABIAlign), + assumeAligned(PrefAlign))) + return Err; - if (!isUInt<16>(PrefAlign)) + break; + } + case 'n': // Native integer types. + while (true) { + unsigned Width; + if (Error Err = getInt(Tok, Width)) + return Err; + if (Width == 0) return reportError( - "Invalid preferred alignment, must be a 16bit integer"); - if (PrefAlign != 0 && !isPowerOf2_64(PrefAlign)) - return reportError("Invalid preferred alignment, must be a power of 2"); - - if (Error Err = setPrimitiveSpec(Specifier, Size, assumeAligned(ABIAlign), - assumeAligned(PrefAlign))) + "Zero width native integer type in datalayout string"); + LegalIntWidths.push_back(Width); + if (Rest.empty()) + break; + if (Error Err = ::split(Rest, ':', Split)) return Err; - - break; } - case 'n': // Native integer types. - while (true) { - unsigned Width; - if (Error Err = getInt(Tok, Width)) - return Err; - if (Width == 0) - return reportError( - "Zero width native integer type in datalayout string"); - LegalIntWidths.push_back(Width); - if (Rest.empty()) - break; - if (Error Err = ::split(Rest, ':', Split)) - return Err; - } - break; - case 'S': { // Stack natural alignment. - uint64_t Alignment; - if (Error Err = getIntInBytes(Tok, Alignment)) - return Err; - if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment)) - return reportError("Alignment is neither 0 nor a power of 2"); - StackNaturalAlign = MaybeAlign(Alignment); + break; + case 'S': { // Stack natural alignment. + uint64_t Alignment; + if (Error Err = getIntInBytes(Tok, Alignment)) + return Err; + if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment)) + return reportError("Alignment is neither 0 nor a power of 2"); + StackNaturalAlign = MaybeAlign(Alignment); + break; + } + case 'F': { + switch (Tok.front()) { + case 'i': + TheFunctionPtrAlignType = FunctionPtrAlignType::Independent; break; - } - case 'F': { - switch (Tok.front()) { - case 'i': - TheFunctionPtrAlignType = FunctionPtrAlignType::Independent; - break; - case 'n': - TheFunctionPtrAlignType = FunctionPtrAlignType::MultipleOfFunctionAlign; - break; - default: - return reportError("Unknown function pointer alignment type in " - "datalayout string"); - } - Tok = Tok.substr(1); - uint64_t Alignment; - if (Error Err = getIntInBytes(Tok, Alignment)) - return Err; - if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment)) - return reportError("Alignment is neither 0 nor a power of 2"); - FunctionPtrAlign = MaybeAlign(Alignment); + case 'n': + TheFunctionPtrAlignType = FunctionPtrAlignType::MultipleOfFunctionAlign; break; + default: + return reportError("Unknown function pointer alignment type in " + "datalayout string"); } - case 'P': { // Function address space. - if (Error Err = getAddrSpace(Tok, ProgramAddrSpace)) - return Err; + Tok = Tok.substr(1); + uint64_t Alignment; + if (Error Err = getIntInBytes(Tok, Alignment)) + return Err; + if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment)) + return reportError("Alignment is neither 0 nor a power of 2"); + FunctionPtrAlign = MaybeAlign(Alignment); + break; + } + case 'P': { // Function address space. + if (Error Err = getAddrSpace(Tok, ProgramAddrSpace)) + return Err; + break; + } + case 'A': { // Default stack/alloca address space. + if (Error Err = getAddrSpace(Tok, AllocaAddrSpace)) + return Err; + break; + } + case 'G': { // Default address space for global variables. + if (Error Err = getAddrSpace(Tok, DefaultGlobalsAddrSpace)) + return Err; + break; + } + case 'm': + if (!Tok.empty()) + return reportError("Unexpected trailing characters after mangling " + "specifier in datalayout string"); + if (Rest.empty()) + return reportError("Expected mangling specifier in datalayout string"); + if (Rest.size() > 1) + return reportError("Unknown mangling specifier in datalayout string"); + switch (Rest[0]) { + default: + return reportError("Unknown mangling in datalayout string"); + case 'e': + ManglingMode = MM_ELF; break; - } - case 'A': { // Default stack/alloca address space. - if (Error Err = getAddrSpace(Tok, AllocaAddrSpace)) - return Err; + case 'l': + ManglingMode = MM_GOFF; break; - } - case 'G': { // Default address space for global variables. - if (Error Err = getAddrSpace(Tok, DefaultGlobalsAddrSpace)) - return Err; + case 'o': + ManglingMode = MM_MachO; break; - } case 'm': - if (!Tok.empty()) - return reportError("Unexpected trailing characters after mangling " - "specifier in datalayout string"); - if (Rest.empty()) - return reportError("Expected mangling specifier in datalayout string"); - if (Rest.size() > 1) - return reportError("Unknown mangling specifier in datalayout string"); - switch(Rest[0]) { - default: - return reportError("Unknown mangling in datalayout string"); - case 'e': - ManglingMode = MM_ELF; - break; - case 'l': - ManglingMode = MM_GOFF; - break; - case 'o': - ManglingMode = MM_MachO; - break; - case 'm': - ManglingMode = MM_Mips; - break; - case 'w': - ManglingMode = MM_WinCOFF; - break; - case 'x': - ManglingMode = MM_WinCOFFX86; - break; - case 'a': - ManglingMode = MM_XCOFF; - break; - } + ManglingMode = MM_Mips; break; - default: - return reportError("Unknown specifier in datalayout string"); + case 'w': + ManglingMode = MM_WinCOFF; + break; + case 'x': + ManglingMode = MM_WinCOFFX86; + break; + case 'a': + ManglingMode = MM_XCOFF; break; } + break; + default: + return reportError("Unknown specifier in datalayout string"); + } + + return Error::success(); +} + +Error DataLayout::parseLayoutString(StringRef LayoutString) { + StringRepresentation = std::string(LayoutString); + + if (LayoutString.empty()) + return Error::success(); + + // Split the data layout string into specifications separated by '-' and + // parse each specification individually, updating internal data structures. + for (StringRef Spec : split(LayoutString, '-')) { + if (Spec.empty()) + return createStringError("empty specification is not allowed"); + if (Error Err = parseSpecification(Spec)) + return Err; } return Error::success(); diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp index dcb2e614f4c40de..f5c930ebdbb9c46 100644 --- a/llvm/unittests/IR/DataLayoutTest.cpp +++ b/llvm/unittests/IR/DataLayoutTest.cpp @@ -124,6 +124,16 @@ TEST(DataLayoutTest, ParseErrors) { FailedWithMessage("Alignment is neither 0 nor a power of 2")); } +TEST(DataLayout, LayoutStringFormat) { + for (StringRef Str : {"", "e", "m:e", "m:e-e"}) + EXPECT_THAT_EXPECTED(DataLayout::parse(Str), Succeeded()); + + for (StringRef Str : {"-", "e-", "-m:e", "m:e--e"}) + EXPECT_THAT_EXPECTED( + DataLayout::parse(Str), + FailedWithMessage("empty specification is not allowed")); +} + TEST(DataLayoutTest, CopyAssignmentInvalidatesStructLayout) { DataLayout DL1 = cantFail(DataLayout::parse("p:32:32")); DataLayout DL2 = cantFail(DataLayout::parse("p:64:64")); From aaab4fcf656df30a533848d06400544d01992393 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 15 Aug 2024 15:19:41 +0200 Subject: [PATCH 028/441] Revert "[SLP][NFC]Remove unused using declarations, reduce mem usage in containers, NFC" This reverts commit e1b15504a831e63af6fb9a6e83faaa10ef425ae6. This causes compile-time regressions, see: http://llvm-compile-time-tracker.com/compare.php?from=e687a9f2dd389a54a10456e57693f93df0c64c02&to=e1b15504a831e63af6fb9a6e83faaa10ef425ae6&stat=instructions:u Probably some of the new SmallVector sizes are sub-optimal. --- .../llvm/Transforms/Vectorize/SLPVectorizer.h | 4 +-- .../Transforms/Vectorize/SLPVectorizer.cpp | 34 ++++++++++--------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h index 809beadb5f7df3c..95531544a1c8176 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -56,9 +56,9 @@ class BoUpSLP; } // end namespace slpvectorizer struct SLPVectorizerPass : public PassInfoMixin { - using StoreList = SmallVector; + using StoreList = SmallVector; using StoreListMap = MapVector; - using GEPList = SmallVector; + using GEPList = SmallVector; using GEPListMap = MapVector; using InstSetVector = SmallSetVector; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 81c4193cfe1081e..81841a8f692870f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1243,11 +1243,13 @@ class BoUpSLP { StridedVectorize }; - using ValueList = SmallVector; - using ValueSet = SmallPtrSet; + using ValueList = SmallVector; + using InstrList = SmallVector; + using ValueSet = SmallPtrSet; + using StoreList = SmallVector; using ExtraValueToDebugLocsMap = MapVector>; - using OrdersType = SmallVector; + using OrdersType = SmallVector; BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li, @@ -1469,7 +1471,7 @@ class BoUpSLP { /// \param TryRecursiveCheck used to check if long masked gather can be /// represented as a serie of loads/insert subvector, if profitable. LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, - OrdersType &Order, + SmallVectorImpl &Order, SmallVectorImpl &PointerOps, bool TryRecursiveCheck = true) const; @@ -2838,7 +2840,7 @@ class BoUpSLP { /// \param ResizeAllowed indicates whether it is allowed to handle subvector /// extract order. bool canReuseExtract(ArrayRef VL, Value *OpValue, - OrdersType &CurrentOrder, + SmallVectorImpl &CurrentOrder, bool ResizeAllowed = false) const; /// Vectorize a single entry in the tree. @@ -3082,10 +3084,10 @@ class BoUpSLP { CombinedOpcode CombinedOp = NotCombinedOp; /// Does this sequence require some shuffling? - SmallVector ReuseShuffleIndices; + SmallVector ReuseShuffleIndices; /// Does this entry require reordering? - OrdersType ReorderIndices; + SmallVector ReorderIndices; /// Points back to the VectorizableTree. /// @@ -3106,7 +3108,7 @@ class BoUpSLP { /// The operands of each instruction in each lane Operands[op_index][lane]. /// Note: This helps avoid the replication of the code that performs the /// reordering of operands during buildTree_rec() and vectorizeTree(). - SmallVector Operands; + SmallVector Operands; /// The main/alternate instruction. Instruction *MainOp = nullptr; @@ -3714,13 +3716,13 @@ class BoUpSLP { /// The dependent memory instructions. /// This list is derived on demand in calculateDependencies(). - SmallVector MemoryDependencies; + SmallVector MemoryDependencies; /// List of instructions which this instruction could be control dependent /// on. Allowing such nodes to be scheduled below this one could introduce /// a runtime fault which didn't exist in the original program. /// ex: this is a load or udiv following a readonly call which inf loops - SmallVector ControlDependencies; + SmallVector ControlDependencies; /// This ScheduleData is in the current scheduling region if this matches /// the current SchedulingRegionID of BlockScheduling. @@ -4298,12 +4300,12 @@ static void reorderReuses(SmallVectorImpl &Reuses, ArrayRef Mask) { /// the original order of the scalars. Procedure transforms the provided order /// in accordance with the given \p Mask. If the resulting \p Order is just an /// identity order, \p Order is cleared. -static void reorderOrder(BoUpSLP::OrdersType &Order, ArrayRef Mask, +static void reorderOrder(SmallVectorImpl &Order, ArrayRef Mask, bool BottomOrder = false) { assert(!Mask.empty() && "Expected non-empty mask."); unsigned Sz = Mask.size(); if (BottomOrder) { - BoUpSLP::OrdersType PrevOrder; + SmallVector PrevOrder; if (Order.empty()) { PrevOrder.resize(Sz); std::iota(PrevOrder.begin(), PrevOrder.end(), 0); @@ -4693,7 +4695,7 @@ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind, } BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( - ArrayRef VL, const Value *VL0, OrdersType &Order, + ArrayRef VL, const Value *VL0, SmallVectorImpl &Order, SmallVectorImpl &PointerOps, bool TryRecursiveCheck) const { // Check that a vectorized load would load the same memory as a scalar // load. For example, we don't want to vectorize loads that are smaller @@ -4821,7 +4823,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( for (unsigned Cnt = 0, End = VL.size(); Cnt + VF <= End; Cnt += VF, ++VectorizedCnt) { ArrayRef Slice = VL.slice(Cnt, VF); - OrdersType Order; + SmallVector Order; SmallVector PointerOps; LoadsState LS = canVectorizeLoads(Slice, Slice.front(), Order, PointerOps, @@ -5395,7 +5397,7 @@ void BoUpSLP::reorderNodeWithReuses(TreeEntry &TE, ArrayRef Mask) const { TE.ReorderIndices.clear(); // Try to improve gathered nodes with clustered reuses, if possible. ArrayRef Slice = ArrayRef(NewMask).slice(0, Sz); - OrdersType NewOrder(Slice); + SmallVector NewOrder(Slice); inversePermutation(NewOrder, NewMask); reorderScalars(TE.Scalars, NewMask); // Fill the reuses mask with the identity submasks. @@ -7715,7 +7717,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const { } bool BoUpSLP::canReuseExtract(ArrayRef VL, Value *OpValue, - OrdersType &CurrentOrder, + SmallVectorImpl &CurrentOrder, bool ResizeAllowed) const { const auto *It = find_if(VL, IsaPred); assert(It != VL.end() && "Expected at least one extract instruction."); From 75cb7de404ee236d6297c551740a2681583d7e5e Mon Sep 17 00:00:00 2001 From: earnol Date: Thu, 15 Aug 2024 10:15:27 -0400 Subject: [PATCH 029/441] [ubsan] Display correct runtime messages for negative _BitInt (#96240) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this patch compiler-rt ubsan library has a bug displaying incorrect values for variables of the _BitInt (previously called _ExtInt) type. This patch affects affects both: generation of metadata inside code generator and runtime part. The runtime part provided only for i386 and x86_64 runtimes. Other runtimes should be updated to take full benefit of this patch. The patch is constructed the way to be backward compatible and int and float type runtime diagnostics should be unaffected for not yet updated runtimes. This patch fixes issue https://github.com/llvm/llvm-project/issues/64100. Co-authored-by: Eänolituri Lómitaurë Co-authored-by: Aaron Ballman Co-authored-by: Paul Kirth --- clang/lib/CodeGen/CGExpr.cpp | 59 +++++- clang/test/CodeGen/bit-int-ubsan.c | 96 ++++++++++ compiler-rt/lib/ubsan/ubsan_value.cpp | 17 +- compiler-rt/lib/ubsan/ubsan_value.h | 33 +++- .../ubsan/TestCases/Integer/bit-int-pass.c | 42 +++++ .../test/ubsan/TestCases/Integer/bit-int.c | 170 ++++++++++++++++++ 6 files changed, 404 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGen/bit-int-ubsan.c create mode 100644 compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c create mode 100644 compiler-rt/test/ubsan/TestCases/Integer/bit-int.c diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 0672861790633bb..48d9a3b8a5acb3b 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/MatrixBuilder.h" #include "llvm/Passes/OptimizationLevel.h" #include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/SaveAndRestore.h" @@ -65,6 +66,22 @@ static llvm::cl::opt ClSanitizeGuardChecks( "ubsan-guard-checks", llvm::cl::Optional, llvm::cl::desc("Guard UBSAN checks with `llvm.allow.ubsan.check()`.")); +//===--------------------------------------------------------------------===// +// Defines for metadata +//===--------------------------------------------------------------------===// + +// Those values are crucial to be the SAME as in ubsan runtime library. +enum VariableTypeDescriptorKind : uint16_t { + /// An integer type. + TK_Integer = 0x0000, + /// A floating-point type. + TK_Float = 0x0001, + /// An _BitInt(N) type. + TK_BitInt = 0x0002, + /// Any other type. The value representation is unspecified. + TK_Unknown = 0xffff +}; + //===--------------------------------------------------------------------===// // Miscellaneous Helper Methods //===--------------------------------------------------------------------===// @@ -3288,22 +3305,40 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { /// { i16 TypeKind, i16 TypeInfo } /// \endcode /// -/// followed by an array of i8 containing the type name. TypeKind is 0 for an -/// integer, 1 for a floating point value, and -1 for anything else. +/// followed by an array of i8 containing the type name with extra information +/// for BitInt. TypeKind is TK_Integer(0) for an integer, TK_Float(1) for a +/// floating point value, TK_BitInt(2) for BitInt and TK_Unknown(0xFFFF) for +/// anything else. llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) { // Only emit each type's descriptor once. if (llvm::Constant *C = CGM.getTypeDescriptorFromMap(T)) return C; - uint16_t TypeKind = -1; + uint16_t TypeKind = TK_Unknown; uint16_t TypeInfo = 0; + bool IsBitInt = false; if (T->isIntegerType()) { - TypeKind = 0; + TypeKind = TK_Integer; TypeInfo = (llvm::Log2_32(getContext().getTypeSize(T)) << 1) | (T->isSignedIntegerType() ? 1 : 0); + // Follow suggestion from discussion of issue 64100. + // So we can write the exact amount of bits in TypeName after '\0' + // making it .'\0'.<32-bit width>. + if (T->isSignedIntegerType() && T->getAs()) { + // Do a sanity checks as we are using 32-bit type to store bit length. + assert(getContext().getTypeSize(T) > 0 && + " non positive amount of bits in __BitInt type"); + assert(getContext().getTypeSize(T) <= 0xFFFFFFFF && + " too many bits in __BitInt type"); + + // Redefine TypeKind with the actual __BitInt type if we have signed + // BitInt. + TypeKind = TK_BitInt; + IsBitInt = true; + } } else if (T->isFloatingType()) { - TypeKind = 1; + TypeKind = TK_Float; TypeInfo = getContext().getTypeSize(T); } @@ -3314,6 +3349,20 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) { DiagnosticsEngine::ak_qualtype, (intptr_t)T.getAsOpaquePtr(), StringRef(), StringRef(), std::nullopt, Buffer, std::nullopt); + if (IsBitInt) { + // The Structure is: 0 to end the string, 32 bit unsigned integer in target + // endianness, zero. + char S[6] = {'\0', '\0', '\0', '\0', '\0', '\0'}; + const auto *EIT = T->castAs(); + uint32_t Bits = EIT->getNumBits(); + llvm::support::endian::write32(S + 1, Bits, + getTarget().isBigEndian() + ? llvm::endianness::big + : llvm::endianness::little); + StringRef Str = StringRef(S, sizeof(S) / sizeof(decltype(S[0]))); + Buffer.append(Str); + } + llvm::Constant *Components[] = { Builder.getInt16(TypeKind), Builder.getInt16(TypeInfo), llvm::ConstantDataArray::getString(getLLVMContext(), Buffer) diff --git a/clang/test/CodeGen/bit-int-ubsan.c b/clang/test/CodeGen/bit-int-ubsan.c new file mode 100644 index 000000000000000..35f96963c181d15 --- /dev/null +++ b/clang/test/CodeGen/bit-int-ubsan.c @@ -0,0 +1,96 @@ +// REQUIRES: x86-registered-target +// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O0 -S -emit-llvm -o - %s | FileCheck %s + +// The runtime test checking the _BitInt ubsan feature is located in compiler-rt/test/ubsan/TestCases/Integer/bit-int.c + +#include +#include + +uint32_t float_divide_by_zero() { + float f = 1.0f / 0.0f; + // CHECK: constant { i16, i16, [8 x i8] } { i16 1, i16 32, [8 x i8] c"'float'\00" } + _BitInt(37) r = (_BitInt(37))f; + // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 13, [20 x i8] c"'_BitInt(37)'\00%\00\00\00\00\00" } + return r; +} + +uint32_t integer_divide_by_zero() __attribute__((no_sanitize("memory"))) { + _BitInt(37) x = 1 / 0; + // CHECK: constant { i16, i16, [32 x i8] } { i16 0, i16 10, [32 x i8] c"'uint32_t' (aka 'unsigned int')\00" } + return x; +} + +uint32_t implicit_unsigned_integer_truncation() { + unsigned _BitInt(37) x = 2U; + x += float_divide_by_zero(); + x += integer_divide_by_zero(); + x = x + 0xFFFFFFFFFFFFFFFFULL; + // CHECK: constant { i16, i16, [23 x i8] } { i16 0, i16 12, [23 x i8] c"'unsigned _BitInt(37)'\00" } + uint32_t r = x & 0xFFFFFFFF; + return r; +} + +uint32_t array_bounds() { + _BitInt(37) x[4]; + _BitInt(37) y = x[10]; + // CHECK: constant { i16, i16, [17 x i8] } { i16 -1, i16 0, [17 x i8] c"'_BitInt(37)[4]'\00" } + return (uint32_t)y; +} + +uint32_t float_cast_overflow() { + float a = 100000000.0f; + _BitInt(7) b = (_BitInt(7))a; + // CHECK: constant { i16, i16, [19 x i8] } { i16 2, i16 7, [19 x i8] c"'_BitInt(7)'\00\07\00\00\00\00\00" } + return b; +} + +_BitInt(13) implicit_signed_integer_truncation() { + _BitInt(73) x = (_BitInt(73)) ~((~0UL) >> 1); + return x; + // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(73)'\00I\00\00\00\00\00" } + // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 9, [20 x i8] c"'_BitInt(13)'\00\0D\00\00\00\00\00" } +} + +uint32_t negative_shift1(unsigned _BitInt(37) x) + __attribute__((no_sanitize("memory"))) { + _BitInt(9) c = -2; + return x >> c; + // CHECK: constant { i16, i16, [19 x i8] } { i16 2, i16 9, [19 x i8] c"'_BitInt(9)'\00\09\00\00\00\00\00" } +} + +uint32_t negative_shift2(unsigned _BitInt(37) x) + __attribute__((no_sanitize("memory"))) { + _BitInt(17) c = -2; + return x >> c; + // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 11, [20 x i8] c"'_BitInt(17)'\00\11\00\00\00\00\00" } +} + +uint32_t negative_shift3(unsigned _BitInt(37) x) + __attribute__((no_sanitize("memory"))) { + _BitInt(34) c = -2; + return x >> c; + // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 13, [20 x i8] c"'_BitInt(34)'\00\22\00\00\00\00\00" } +} + +uint32_t negative_shift5(unsigned _BitInt(37) x) + __attribute__((no_sanitize("memory"))) { + _BitInt(68) c = -2; + return x >> c; + // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(68)'\00D\00\00\00\00\00" } +} + +int main(int argc, char **argv) { + // clang-format off + uint64_t result = + 1ULL + + implicit_unsigned_integer_truncation() + + (uint32_t)array_bounds() + + float_cast_overflow() + + (uint64_t)implicit_signed_integer_truncation() + + negative_shift1(5) + + negative_shift2(5) + + negative_shift3(5) + + negative_shift5(5); + // clang-format on + printf("%u\n", (uint32_t)(result & 0xFFFFFFFF)); +} diff --git a/compiler-rt/lib/ubsan/ubsan_value.cpp b/compiler-rt/lib/ubsan/ubsan_value.cpp index dc61e5b939d952b..6e88ebaf34d4be3 100644 --- a/compiler-rt/lib/ubsan/ubsan_value.cpp +++ b/compiler-rt/lib/ubsan/ubsan_value.cpp @@ -67,18 +67,21 @@ const char *__ubsan::getObjCClassName(ValueHandle Pointer) { SIntMax Value::getSIntValue() const { CHECK(getType().isSignedIntegerTy()); + // Val was zero-extended to ValueHandle. Sign-extend from original width + // to SIntMax. + const unsigned ExtraBits = + sizeof(SIntMax) * 8 - getType().getIntegerBitCount(); if (isInlineInt()) { - // Val was zero-extended to ValueHandle. Sign-extend from original width - // to SIntMax. - const unsigned ExtraBits = - sizeof(SIntMax) * 8 - getType().getIntegerBitWidth(); return SIntMax(UIntMax(Val) << ExtraBits) >> ExtraBits; } - if (getType().getIntegerBitWidth() == 64) - return *reinterpret_cast(Val); + if (getType().getIntegerBitWidth() == 64) { + return SIntMax(UIntMax(*reinterpret_cast(Val)) << ExtraBits) >> + ExtraBits; + } #if HAVE_INT128_T if (getType().getIntegerBitWidth() == 128) - return *reinterpret_cast(Val); + return SIntMax(UIntMax(*reinterpret_cast(Val)) << ExtraBits) >> + ExtraBits; #else if (getType().getIntegerBitWidth() == 128) UNREACHABLE("libclang_rt.ubsan was built without __int128 support"); diff --git a/compiler-rt/lib/ubsan/ubsan_value.h b/compiler-rt/lib/ubsan/ubsan_value.h index e0957276dd24197..430c9ea0dc8d150 100644 --- a/compiler-rt/lib/ubsan/ubsan_value.h +++ b/compiler-rt/lib/ubsan/ubsan_value.h @@ -103,6 +103,13 @@ class TypeDescriptor { /// representation is that of bitcasting the floating-point value to an /// integer type. TK_Float = 0x0001, + /// An _BitInt(N) type. Lowest bit is 1 for a signed value, 0 for an + /// unsigned value. Remaining bits are log_2(bit_width). The value + /// representation is the integer itself if it fits into a ValueHandle, and + /// a pointer to the integer otherwise. TypeName contains the true width + /// of the type for the signed _BitInt(N) type stored after zero bit after + /// TypeName as 32-bit unsigned integer. + TK_BitInt = 0x0002, /// Any other type. The value representation is unspecified. TK_Unknown = 0xffff }; @@ -113,10 +120,15 @@ class TypeDescriptor { return static_cast(TypeKind); } - bool isIntegerTy() const { return getKind() == TK_Integer; } + bool isIntegerTy() const { + return getKind() == TK_Integer || getKind() == TK_BitInt; + } + bool isBitIntTy() const { return getKind() == TK_BitInt; } + bool isSignedIntegerTy() const { return isIntegerTy() && (TypeInfo & 1); } + bool isSignedBitIntTy() const { return isBitIntTy() && (TypeInfo & 1); } bool isUnsignedIntegerTy() const { return isIntegerTy() && !(TypeInfo & 1); } @@ -125,6 +137,25 @@ class TypeDescriptor { return 1 << (TypeInfo >> 1); } + const char *getBitIntBitCountPointer() const { + DCHECK(isBitIntTy()); + DCHECK(isSignedBitIntTy()); + // Scan Name for zero and return the next address + const char *p = getTypeName(); + while (*p != '\0') + ++p; + // Return the next address + return p + 1; + } + + unsigned getIntegerBitCount() const { + DCHECK(isIntegerTy()); + if (isSignedBitIntTy()) + return *reinterpret_cast(getBitIntBitCountPointer()); + else + return getIntegerBitWidth(); + } + bool isFloatTy() const { return getKind() == TK_Float; } unsigned getFloatBitWidth() const { CHECK(isFloatTy()); diff --git a/compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c b/compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c new file mode 100644 index 000000000000000..a25428f0eb872f9 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c @@ -0,0 +1,42 @@ +// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -O0 -fsanitize=alignment,array-bounds,bool,float-cast-overflow,implicit-integer-sign-change,implicit-signed-integer-truncation,implicit-unsigned-integer-truncation,integer-divide-by-zero,nonnull-attribute,null,nullability-arg,nullability-assign,nullability-return,pointer-overflow,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,unsigned-integer-overflow,unsigned-shift-base,vla-bound %s -o %t1 && %run %t1 2>&1 | FileCheck %s + +#include +#include + +// In this test there is an expectation of assignment of _BitInt not producing any output. +uint32_t nullability_arg(_BitInt(37) *_Nonnull x) + __attribute__((no_sanitize("address"))) + __attribute__((no_sanitize("memory"))) { + _BitInt(37) y = *(_BitInt(37) *)&x; + return (y > 0) ? y : 0; +} + +// In this test there is an expectation of ubsan not triggeting on returning random address which is inside address space of the process. +_BitInt(37) nonnull_attribute(__attribute__((nonnull)) _BitInt(37) * x) + __attribute__((no_sanitize("address"))) + __attribute__((no_sanitize("memory"))) { + return *(_BitInt(37) *)&x; +} + +// In this test there is an expectation of assignment of uint32_t from "invalid" _BitInt is not producing any output. +uint32_t nullability_assign(_BitInt(7) * x) + __attribute__((no_sanitize("address"))) + __attribute__((no_sanitize("memory"))) { + _BitInt(7) *_Nonnull y = x; + int32_t r = *(_BitInt(7) *)&y; + return (r > 0) ? r : 0; +} + +// In those examples the file is expected to compile & run with no diagnostics +// CHECK-NOT: runtime error: + +int main(int argc, char **argv) { + // clang-format off + uint64_t result = + 1ULL + + nullability_arg((_BitInt(37) *)argc) + + ((uint64_t)nonnull_attribute((_BitInt(37) *)argc) & 0xFFFFFFFF) + + nullability_assign((_BitInt(7) *)argc); + // clang-format on + printf("%u\n", (uint32_t)(result & 0xFFFFFFFF)); +} diff --git a/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c b/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c new file mode 100644 index 000000000000000..18dc10bf3a38899 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c @@ -0,0 +1,170 @@ +// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -O0 -fsanitize=array-bounds,float-cast-overflow,implicit-integer-sign-change,implicit-signed-integer-truncation,implicit-unsigned-integer-truncation,integer-divide-by-zero,pointer-overflow,shift-base,shift-exponent,signed-integer-overflow,unsigned-integer-overflow,unsigned-shift-base,vla-bound %s -o %t1 && %run %t1 2>&1 | FileCheck %s + +#include +#include + +uint32_t float_divide_by_zero() { + float f = 1.0f / 0.0f; + _BitInt(37) r = (_BitInt(37))f; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:19: runtime error: inf is outside the range of representable values of type + return r; +} + +uint32_t integer_divide_by_zero() __attribute__((no_sanitize("memory"))) { + _BitInt(37) x = 1 / 0; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:21: runtime error: division by zero + return x; +} + +uint32_t implicit_unsigned_integer_truncation() { + unsigned _BitInt(37) x = 2U; + x += float_divide_by_zero(); + x += integer_divide_by_zero(); + x = x + 0xFFFFFFFFFFFFFFFFULL; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:9: runtime error: unsigned integer overflow: + uint32_t r = x & 0xFFFFFFFF; + return r; +} + +uint32_t pointer_overflow() __attribute__((no_sanitize("address"))) { + _BitInt(37) *x = (_BitInt(37) *)1; + _BitInt(37) *y = x - 1; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:22: runtime error: pointer index expression with base + uint32_t r = *(_BitInt(37) *)&y; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:16: runtime error: implicit conversion from type + return r; +} + +uint32_t vla_bound(_BitInt(37) x) { + _BitInt(37) a[x - 1]; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:17: runtime error: variable length array bound evaluates to non-positive value + return 0; +} + +uint32_t unsigned_shift_base() { + unsigned _BitInt(37) x = ~0U << 1; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:32: runtime error: left shift of 4294967295 by 1 places cannot be represented in type + return x; +} + +uint32_t array_bounds() { + _BitInt(37) x[4]; + _BitInt(37) y = x[10]; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:19: runtime error: index 10 out of bounds for type + return (uint32_t)y; +} + +uint32_t float_cast_overflow() { + float a = 100000000.0f; + _BitInt(7) b = (_BitInt(7))a; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:18: runtime error: 1e+08 is outside the range of representable values of type + return b; +} + +uint32_t implicit_integer_sign_change(unsigned _BitInt(37) x) { + _BitInt(37) r = x; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:19: runtime error: implicit conversion from type '{{[^']+}}' of value + return r & 0xFFFFFFFF; +} + +_BitInt(13) implicit_signed_integer_truncation() { +#ifdef __SIZEOF_INT128__ + _BitInt(73) x = (_BitInt(73)) ~((~0UL) >> 1); +#else + uint32_t x = 0x7FFFFFFFUL; +#endif + return x; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:10: runtime error: implicit conversion from type +} + +_BitInt(37) shift_exponent() __attribute__((no_sanitize("memory"))) { + _BitInt(37) x = 1 << (-1); + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:21: runtime error: shift exponent -1 is negative + return x; +} + +_BitInt(37) shift_base() __attribute__((no_sanitize("memory"))) { + _BitInt(37) x = (-1) << 1; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:24: runtime error: left shift of negative value -1 + return x; +} + +uint32_t negative_shift1(unsigned _BitInt(37) x) + __attribute__((no_sanitize("memory"))) { + _BitInt(9) c = -2; + return x >> c; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:12: runtime error: shift exponent -2 is negative +} + +uint32_t negative_shift2(unsigned _BitInt(37) x) + __attribute__((no_sanitize("memory"))) { + _BitInt(17) c = -2; + return x >> c; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:12: runtime error: shift exponent -2 is negative +} + +uint32_t negative_shift3(unsigned _BitInt(37) x) + __attribute__((no_sanitize("memory"))) { + _BitInt(34) c = -2; + return x >> c; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:12: runtime error: shift exponent -2 is negative +} + +uint32_t negative_shift4(unsigned _BitInt(37) x) + __attribute__((no_sanitize("memory"))) { + int64_t c = -2; + return x >> c; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:12: runtime error: shift exponent -2 is negative +} + +uint32_t negative_shift5(unsigned _BitInt(37) x) + __attribute__((no_sanitize("memory"))) { +#ifdef __SIZEOF_INT128__ + _BitInt(68) c = -2; +#else + // We cannot check BitInt values > 64 without int128_t support + _BitInt(48) c = -2; +#endif + return x >> c; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:12: runtime error: shift exponent -2 is negative +} + +uint32_t unsigned_integer_overflow() __attribute__((no_sanitize("memory"))) { + unsigned _BitInt(37) x = ~0U; + ++x; + return x; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:10: runtime error: implicit conversion from type +} + +// In this test no run-time overflow expected, so no diagnostics here, but should be a conversion error from the negative number on return. +uint32_t signed_integer_overflow() __attribute__((no_sanitize("memory"))) { + _BitInt(37) x = (_BitInt(37)) ~((0x8FFFFFFFFFFFFFFFULL) >> 1); + --x; + return x; + // CHECK: {{.*}}bit-int.c:[[#@LINE-1]]:10: runtime error: implicit conversion from type +} + +int main(int argc, char **argv) { + // clang-format off + uint64_t result = + 1ULL + + implicit_unsigned_integer_truncation() + + pointer_overflow() + + vla_bound(argc) + + unsigned_shift_base() + + (uint32_t)array_bounds() + + float_cast_overflow() + + implicit_integer_sign_change((unsigned _BitInt(37))(argc - 2)) + + (uint64_t)implicit_signed_integer_truncation() + + shift_exponent() + + (uint32_t)shift_base() + + negative_shift1(5) + + negative_shift2(5) + + negative_shift3(5) + + negative_shift4(5) + + negative_shift5(5) + + unsigned_integer_overflow() + + signed_integer_overflow(); + // clang-format on + printf("%u\n", (uint32_t)(result & 0xFFFFFFFF)); +} From 65ac12d3c9877ecf5b97552364e7eead887d94eb Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 15 Aug 2024 07:21:10 -0700 Subject: [PATCH 030/441] [SLP][NFC]Add a test with incorrect minbitwidth analysis for reduced operands --- .../X86/operand-is-reduced-val.ll | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll new file mode 100644 index 000000000000000..5fb93e27539d8e7 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s -slp-threshold=-10 | FileCheck %s + +define i64 @src(i32 %a) { +; CHECK-LABEL: define i64 @src( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[A]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i32> [[TMP5]] to <4 x i64> +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], [[TMP16]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = add i64 [[TMP19]], 4294967297 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OP_RDX1]], [[TMP17]] +; CHECK-NEXT: ret i64 [[TMP21]] +; +entry: + %0 = sext i32 %a to i64 + %1 = add nsw i64 %0, 4294967297 + %2 = sext i32 %a to i64 + %3 = add nsw i64 %2, 4294967297 + %4 = add i64 %3, %1 + %5 = and i64 %3, 1 + %6 = add i64 %4, %5 + %7 = sext i32 %a to i64 + %8 = add nsw i64 %7, 4294967297 + %9 = add i64 %8, %6 + %10 = and i64 %8, 1 + %11 = add i64 %9, %10 + %12 = sext i32 %a to i64 + %13 = add nsw i64 %12, 4294967297 + %14 = add i64 %13, %11 + %15 = and i64 %13, 1 + %16 = add i64 %14, %15 + %17 = sext i32 %a to i64 + %18 = add nsw i64 %17, 4294967297 + %19 = add i64 %18, %16 + %20 = and i64 %18, 1 + %21 = add i64 %19, %20 + ret i64 %21 +} From 13a6a7975256ebdbce85f1174ae2eec735fa0d7a Mon Sep 17 00:00:00 2001 From: Tobias Stadler Date: Thu, 15 Aug 2024 16:27:18 +0200 Subject: [PATCH 031/441] [GlobalISel] Combiner: Fix warning after #102163 Default case in covered switches is illegal. https://llvm.org/docs/CodingStandards.html#don-t-use-default-labels-in-fully-covered-switches-over-enumerations Pull Request: https://github.com/llvm/llvm-project/pull/104441 From 56140a8258a3498cfcd9f0f05c182457d43cbfd2 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 15 Aug 2024 07:57:37 -0700 Subject: [PATCH 032/441] [SLP]Fix PR104422: Wrong value truncation The minbitwidth restrictions can be skipped only for immediate reduced values, for other nodes still need to check if external users allow bitwidth reduction. Fixes https://github.com/llvm/llvm-project/issues/104422 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++- .../SLPVectorizer/X86/operand-is-reduced-val.ll | 17 ++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 81841a8f692870f..87b4ed599b5bb15 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15663,7 +15663,8 @@ bool BoUpSLP::collectValuesToDemote( if (any_of(E.Scalars, [&](Value *V) { return !all_of(V->users(), [=](User *U) { return getTreeEntry(U) || - (UserIgnoreList && UserIgnoreList->contains(U)) || + (E.Idx == 0 && UserIgnoreList && + UserIgnoreList->contains(U)) || (!isa(U) && U->getType()->isSized() && !U->getType()->isScalableTy() && DL->getTypeSizeInBits(U->getType()) <= BitWidth); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll index 5fb93e27539d8e7..5fcac3fbf3bafea 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll @@ -8,15 +8,18 @@ define i64 @src(i32 %a) { ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[A]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i32> [[TMP5]] to <4 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) -; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], [[TMP16]] -; CHECK-NEXT: [[OP_RDX1:%.*]] = add i64 [[TMP19]], 4294967297 -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OP_RDX1]], [[TMP17]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP18]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> , i64 [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP12]], [[TMP13]] ; CHECK-NEXT: ret i64 [[TMP21]] ; entry: From 95daf1aedfe521704c601a26ad8011c6e237c38a Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Thu, 15 Aug 2024 08:06:41 -0700 Subject: [PATCH 033/441] Allow optimization of __size_returning_new variants. (#102258) https://github.com/llvm/llvm-project/pull/101564 added support to TLI to detect variants of operator new which provide feedback on the actual size of memory allocated (http://wg21.link/P0901R5). This patch extends SimplifyLibCalls to handle hot cold hinting of these variants. --- .../llvm/Transforms/Utils/BuildLibCalls.h | 7 ++ .../InstCombine/InstCombineCalls.cpp | 2 +- .../Instrumentation/MemProfiler.cpp | 14 ++- llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 51 +++++++++ .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 32 +++++- .../InstCombine/simplify-libcalls-new.ll | 100 ++++++++++++++++++ 6 files changed, 199 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h index 429d6a2e05236fd..1979c4af770b02b 100644 --- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -267,6 +267,13 @@ namespace llvm { IRBuilderBase &B, const TargetLibraryInfo *TLI, LibFunc NewFunc, uint8_t HotCold); + Value *emitHotColdSizeReturningNew(Value *Num, IRBuilderBase &B, + const TargetLibraryInfo *TLI, + LibFunc NewFunc, uint8_t HotCold); + Value *emitHotColdSizeReturningNewAligned(Value *Num, Value *Align, + IRBuilderBase &B, + const TargetLibraryInfo *TLI, + LibFunc NewFunc, uint8_t HotCold); } #endif diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4f9a5bd2c17f037..2c2e1bc4686a4ed 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3692,7 +3692,7 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { // Skip optimizing notail and musttail calls so // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants. - // LibCallSimplifier::optimizeCall should try to preseve tail calls though. + // LibCallSimplifier::optimizeCall should try to preserve tail calls though. if (CI->isMustTailCall() || CI->isNoTailCall()) return nullptr; diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 445bf0bb26cc497..4a43120c9a9e7fa 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryProfileInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" @@ -753,8 +754,8 @@ stackFrameIncludesInlinedCallStack(ArrayRef ProfileCallStack, return InlCallStackIter == InlinedCallStack.end(); } -static bool isNewWithHotColdVariant(Function *Callee, - const TargetLibraryInfo &TLI) { +static bool isAllocationWithHotColdVariant(Function *Callee, + const TargetLibraryInfo &TLI) { if (!Callee) return false; LibFunc Func; @@ -769,6 +770,8 @@ static bool isNewWithHotColdVariant(Function *Callee, case LibFunc_ZnamRKSt9nothrow_t: case LibFunc_ZnamSt11align_val_t: case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: + case LibFunc_size_returning_new: + case LibFunc_size_returning_new_aligned: return true; case LibFunc_Znwm12__hot_cold_t: case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: @@ -778,6 +781,8 @@ static bool isNewWithHotColdVariant(Function *Callee, case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: case LibFunc_ZnamSt11align_val_t12__hot_cold_t: case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: + case LibFunc_size_returning_new_hot_cold: + case LibFunc_size_returning_new_aligned_hot_cold: return ClMemProfMatchHotColdNew; default: return false; @@ -945,9 +950,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, // instruction's locations match the prefix Frame locations on an // allocation context with the same leaf. if (AllocInfoIter != LocHashToAllocInfo.end()) { - // Only consider allocations via new, to reduce unnecessary metadata, - // since those are the only allocations that will be targeted initially. - if (!isNewWithHotColdVariant(CI->getCalledFunction(), TLI)) + // Only consider allocations which support hinting. + if (!isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI)) continue; // We may match this instruction's location list to multiple MIB // contexts. Add them to a Trie specialized for trimming the contexts to diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 30a343b2c564e86..c23d53f7111d214 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" @@ -1963,6 +1964,56 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, return CI; } +Value *llvm::emitHotColdSizeReturningNew(Value *Num, IRBuilderBase &B, + const TargetLibraryInfo *TLI, + LibFunc SizeFeedbackNewFunc, + uint8_t HotCold) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, SizeFeedbackNewFunc)) + return nullptr; + + StringRef Name = TLI->getName(SizeFeedbackNewFunc); + + // __sized_ptr_t struct return type { void*, size_t } + StructType *SizedPtrT = + StructType::get(M->getContext(), {B.getPtrTy(), Num->getType()}); + FunctionCallee Func = + M->getOrInsertFunction(Name, SizedPtrT, Num->getType(), B.getInt8Ty()); + inferNonMandatoryLibFuncAttrs(M, Name, *TLI); + CallInst *CI = B.CreateCall(Func, {Num, B.getInt8(HotCold)}, "sized_ptr"); + + if (const Function *F = dyn_cast(Func.getCallee())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitHotColdSizeReturningNewAligned(Value *Num, Value *Align, + IRBuilderBase &B, + const TargetLibraryInfo *TLI, + LibFunc SizeFeedbackNewFunc, + uint8_t HotCold) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, SizeFeedbackNewFunc)) + return nullptr; + + StringRef Name = TLI->getName(SizeFeedbackNewFunc); + + // __sized_ptr_t struct return type { void*, size_t } + StructType *SizedPtrT = + StructType::get(M->getContext(), {B.getPtrTy(), Num->getType()}); + FunctionCallee Func = M->getOrInsertFunction(Name, SizedPtrT, Num->getType(), + Align->getType(), B.getInt8Ty()); + inferNonMandatoryLibFuncAttrs(M, Name, *TLI); + CallInst *CI = + B.CreateCall(Func, {Num, Align, B.getInt8(HotCold)}, "sized_ptr"); + + if (const Function *F = dyn_cast(Func.getCallee())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + Value *llvm::emitHotColdNew(Value *Num, IRBuilderBase &B, const TargetLibraryInfo *TLI, LibFunc NewFunc, uint8_t HotCold) { diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 673cc1a6c08f76f..be4d4590cfd839d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/AttributeMask.h" #include "llvm/IR/DataLayout.h" @@ -1745,7 +1746,7 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, // if cold or hot, and leave as-is for default handling if "notcold" aka warm. // Note that in cases where we decide it is "notcold", it might be slightly // better to replace the hinted call with a non hinted call, to avoid the - // extra paramter and the if condition check of the hint value in the + // extra parameter and the if condition check of the hint value in the // allocator. This can be considered in the future. switch (Func) { case LibFunc_Znwm12__hot_cold_t: @@ -1844,6 +1845,30 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold); break; + case LibFunc_size_returning_new: + if (HotCold != NotColdNewHintValue) + return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI, + LibFunc_size_returning_new_hot_cold, + HotCold); + break; + case LibFunc_size_returning_new_hot_cold: + if (OptimizeExistingHotColdNew) + return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI, + LibFunc_size_returning_new_hot_cold, + HotCold); + break; + case LibFunc_size_returning_new_aligned: + if (HotCold != NotColdNewHintValue) + return emitHotColdSizeReturningNewAligned( + CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, + LibFunc_size_returning_new_aligned_hot_cold, HotCold); + break; + case LibFunc_size_returning_new_aligned_hot_cold: + if (OptimizeExistingHotColdNew) + return emitHotColdSizeReturningNewAligned( + CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, + LibFunc_size_returning_new_aligned_hot_cold, HotCold); + break; default: return nullptr; } @@ -3759,6 +3784,7 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, Module *M = CI->getModule(); LibFunc Func; Function *Callee = CI->getCalledFunction(); + // Check for string/memory library functions. if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) { // Make sure we never change the calling convention. @@ -3851,6 +3877,10 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: case LibFunc_ZnamSt11align_val_t12__hot_cold_t: case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: + case LibFunc_size_returning_new: + case LibFunc_size_returning_new_hot_cold: + case LibFunc_size_returning_new_aligned: + case LibFunc_size_returning_new_aligned_hot_cold: return optimizeNew(CI, Builder, Func); default: break; diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll index ecfafbc69797b15..83471f1d0f613fc 100644 --- a/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll +++ b/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll @@ -340,6 +340,94 @@ define void @array_new_align_nothrow_hot_cold() { ret void } +;; Check that operator __size_returning_new(unsigned long) converted to +;; __size_returning_new(unsigned long, __hot_cold_t) with a hot or cold attribute. +; HOTCOLD-LABEL: @size_returning_test() +define void @size_returning_test() { + ;; Attribute cold converted to __hot_cold_t cold value. + ; HOTCOLD: @__size_returning_new_hot_cold(i64 10, i8 [[COLD]]) + %call = call {ptr, i64} @__size_returning_new(i64 10) #3 + %p = extractvalue {ptr, i64} %call, 0 + call void @dummy(ptr %p) + ;; Attribute notcold has no effect. + ; HOTCOLD: @__size_returning_new(i64 10) + %call1 = call {ptr, i64} @__size_returning_new(i64 10) #4 + %p1 = extractvalue {ptr, i64} %call1, 0 + call void @dummy(ptr %p1) + ;; Attribute hot converted to __hot_cold_t hot value. + ; HOTCOLD: @__size_returning_new_hot_cold(i64 10, i8 [[HOT]]) + %call2 = call {ptr, i64} @__size_returning_new(i64 10) #5 + %p2 = extractvalue {ptr, i64} %call2, 0 + call void @dummy(ptr %p2) + ret void +} + +;; Check that operator __size_returning_new_aligned(unsigned long, std::align_val_t) converted to +;; __size_returning_new_aligned(unsigned long, std::align_val_t, __hot_cold_t) with a hot or cold attribute. +; HOTCOLD-LABEL: @size_returning_aligned_test() +define void @size_returning_aligned_test() { + ;; Attribute cold converted to __hot_cold_t cold value. + ; HOTCOLD: @__size_returning_new_aligned_hot_cold(i64 10, i64 8, i8 [[COLD]]) + %call = call {ptr, i64} @__size_returning_new_aligned(i64 10, i64 8) #3 + %p = extractvalue {ptr, i64} %call, 0 + call void @dummy(ptr %p) + ;; Attribute notcold has no effect. + ; HOTCOLD: @__size_returning_new_aligned(i64 10, i64 8) + %call1 = call {ptr, i64} @__size_returning_new_aligned(i64 10, i64 8) #4 + %p1 = extractvalue {ptr, i64} %call1, 0 + call void @dummy(ptr %p1) + ;; Attribute hot converted to __hot_cold_t hot value. + ; HOTCOLD: @__size_returning_new_aligned_hot_cold(i64 10, i64 8, i8 [[HOT]]) + %call2 = call {ptr, i64} @__size_returning_new_aligned(i64 10, i64 8) #5 + %p2 = extractvalue {ptr, i64} %call2, 0 + call void @dummy(ptr %p2) + ret void +} + +;; Check that __size_returning_new_hot_cold(unsigned long, __hot_cold_t) +;; optionally has its hint updated. +; HOTCOLD-LABEL: @size_returning_update_test() +define void @size_returning_update_test() { + ;; Attribute cold converted to __hot_cold_t cold value. + ; HOTCOLD: @__size_returning_new_hot_cold(i64 10, i8 [[PREVHINTCOLD]]) + %call = call {ptr, i64} @__size_returning_new_hot_cold(i64 10, i8 7) #3 + %p = extractvalue {ptr, i64} %call, 0 + call void @dummy(ptr %p) + ;; Attribute notcold converted to __hot_cold_t notcold value. + ; HOTCOLD: @__size_returning_new_hot_cold(i64 10, i8 [[PREVHINTNOTCOLD]]) + %call1 = call {ptr, i64} @__size_returning_new_hot_cold(i64 10, i8 7) #4 + %p1 = extractvalue {ptr, i64} %call1, 0 + call void @dummy(ptr %p1) + ;; Attribute hot converted to __hot_cold_t hot value. + ; HOTCOLD: @__size_returning_new_hot_cold(i64 10, i8 [[PREVHINTHOT]]) + %call2 = call {ptr, i64} @__size_returning_new_hot_cold(i64 10, i8 7) #5 + %p2 = extractvalue {ptr, i64} %call2, 0 + call void @dummy(ptr %p2) + ret void +} + +;; Check that __size_returning_new_aligned_hot_cold(unsigned long, __hot_cold_t) +;; optionally has its hint updated. +; HOTCOLD-LABEL: @size_returning_aligned_update_test() +define void @size_returning_aligned_update_test() { + ;; Attribute cold converted to __hot_cold_t cold value. + ; HOTCOLD: @__size_returning_new_aligned_hot_cold(i64 10, i64 8, i8 [[PREVHINTCOLD]]) + %call = call {ptr, i64} @__size_returning_new_aligned_hot_cold(i64 10, i64 8, i8 7) #3 + %p = extractvalue {ptr, i64} %call, 0 + call void @dummy(ptr %p) + ;; Attribute notcold converted to __hot_cold_t notcold value. + ; HOTCOLD: @__size_returning_new_aligned_hot_cold(i64 10, i64 8, i8 [[PREVHINTNOTCOLD]]) + %call1 = call {ptr, i64} @__size_returning_new_aligned_hot_cold(i64 10, i64 8, i8 7) #4 + %p1 = extractvalue {ptr, i64} %call1, 0 + call void @dummy(ptr %p1) + ;; Attribute hot converted to __hot_cold_t hot value. + ; HOTCOLD: @__size_returning_new_aligned_hot_cold(i64 10, i64 8, i8 [[PREVHINTHOT]]) + %call2 = call {ptr, i64} @__size_returning_new_aligned_hot_cold(i64 10, i64 8, i8 7) #5 + %p2 = extractvalue {ptr, i64} %call2, 0 + call void @dummy(ptr %p2) + ret void +} + ;; So that instcombine doesn't optimize out the call. declare void @dummy(ptr) @@ -360,6 +448,18 @@ declare ptr @_ZnamSt11align_val_t12__hot_cold_t(i64, i64, i8) declare ptr @_ZnamRKSt9nothrow_t12__hot_cold_t(i64, ptr, i8) declare ptr @_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t(i64, i64, ptr, i8) + +declare {ptr, i64} @__size_returning_new(i64) +declare {ptr, i64} @__size_returning_new_hot_cold(i64, i8) +declare {ptr, i64} @__size_returning_new_aligned(i64, i64) +declare {ptr, i64} @__size_returning_new_aligned_hot_cold(i64, i64, i8) + attributes #0 = { builtin allocsize(0) "memprof"="cold" } attributes #1 = { builtin allocsize(0) "memprof"="notcold" } attributes #2 = { builtin allocsize(0) "memprof"="hot" } + +;; Use separate attributes for __size_returning_new variants since they are not +;; treated as builtins. +attributes #3 = { "memprof" = "cold" } +attributes #4 = { "memprof" = "notcold" } +attributes #5 = { "memprof" = "hot" } From fcefe957ddfdc5a2fe9463757b597635e3436e01 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Thu, 15 Aug 2024 08:07:05 -0700 Subject: [PATCH 034/441] [LegalizeTypes][AMDGPU]: Allow for scalarization of insert_subvector (#104236) Legalization for when the inserted subvector is to be scalarized. https://godbolt.org/z/vx3joWqoh --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 16 +++++++++++++ .../AMDGPU/scalarize-insert-subvector.ll | 24 +++++++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/scalarize-insert-subvector.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 6de1e3eca7feda8..27dd4ae241bd100 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -865,6 +865,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecOp_UnaryOp(SDNode *N); SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); + SDValue ScalarizeVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); SDValue ScalarizeVecOp_VSETCC(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ddb7c8c54bbfe46..8c0c2616250c60f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -765,6 +765,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; + case ISD::INSERT_SUBVECTOR: + Res = ScalarizeVecOp_INSERT_SUBVECTOR(N, OpNo); + break; case ISD::EXTRACT_VECTOR_ELT: Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; @@ -882,6 +885,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops); } +/// The inserted subvector is to be scalarized - use insert vector element +/// instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_INSERT_SUBVECTOR(SDNode *N, + unsigned OpNo) { + // We should not be attempting to scalarize the containing vector + assert(OpNo == 1); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue ContainingVec = N->getOperand(0); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), + ContainingVec.getValueType(), ContainingVec, Elt, + N->getOperand(2)); +} + /// If the input is a vector that needs to be scalarized, it must be <1 x ty>, /// so just return the element, ignoring the index. SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { diff --git a/llvm/test/CodeGen/AMDGPU/scalarize-insert-subvector.ll b/llvm/test/CodeGen/AMDGPU/scalarize-insert-subvector.ll new file mode 100644 index 000000000000000..f6f367cf85f7293 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/scalarize-insert-subvector.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefix=GCN %s + +define void @scalarize_insert_subvector(ptr addrspace(3) %inptr, ptr addrspace(3) %inptr1, ptr addrspace(3) %outptr) { +; GCN-LABEL: scalarize_insert_subvector: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: ds_read_b64 v[4:5], v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ds_read_b32 v5, v1 offset:4 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ds_write_b64 v2, v[4:5] +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %load0 = load <2 x i32>, ptr addrspace(3) %inptr, align 8 + %load1= load <2 x i32>, ptr addrspace(3) %inptr1, align 8 + %shuffle0 = shufflevector <2 x i32> %load1, <2 x i32> poison, <1 x i32> + %bitcast0 = bitcast <1 x i32> %shuffle0 to <2 x half> + %bitcast1 = bitcast <2 x i32> %load0 to <4 x half> + %shuffle1 = shufflevector <2 x half> %bitcast0, <2 x half> poison, <4 x i32> + %shuffle2 = shufflevector <4 x half> %bitcast1, <4 x half> %shuffle1, <4 x i32> + store <4 x half> %shuffle2, ptr addrspace(3) %outptr + ret void +} From c7df775440717ec5a3f47b6d485617d802cbd036 Mon Sep 17 00:00:00 2001 From: Mital Ashok Date: Thu, 15 Aug 2024 16:12:11 +0100 Subject: [PATCH 035/441] [Clang] Check explicit object parameter for defaulted operators properly (#100419) Previously, the type of explicit object parameters was not considered for relational operators. This was defined by CWG2586, . This fix also means CWG2547 is now fully implemented. Fixes #100329, fixes #104413. Now start rejecting invalid rvalue reference parameters, which weren't checked for, and start accepting non-reference explicit object parameters (like `bool operator==(this C, C) = default;`) which were previously rejected for the object param not being a reference. Also start rejecting non-reference explicit object parameters for defaulted copy/move assign operators (`A& operator=(this A, const A&) = default;` is invalid but was previously accepted). Fixes #104414. --- clang/docs/ReleaseNotes.rst | 3 + .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/lib/Sema/SemaDeclCXX.cpp | 51 ++++++++-------- .../class.compare.default/p1.cpp | 2 + clang/test/CXX/drs/cwg25xx.cpp | 59 +++++++++++++++++++ clang/test/SemaCXX/cxx2b-deducing-this.cpp | 43 +++++++++++++- clang/www/cxx_dr_status.html | 4 +- 7 files changed, 136 insertions(+), 28 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f5696d6ce15dc7c..b6b7dd5705637a3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -254,6 +254,9 @@ Bug Fixes to C++ Support specialization of a conversion function template. - Correctly diagnose attempts to use a concept name in its own definition; A concept name is introduced to its scope sooner to match the C++ standard. (#GH55875) +- Properly reject defaulted relational operators with invalid types for explicit object parameters, + e.g., ``bool operator==(this int, const Foo&)`` (#GH100329), and rvalue reference parameters. +- Properly reject defaulted copy/move assignment operators that have a non-reference explicit object parameter. Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index da2f939067bfab8..461eeb19f65e4a3 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9752,7 +9752,7 @@ def err_defaulted_special_member_quals : Error< "have 'const'%select{, 'constexpr'|}1 or 'volatile' qualifiers">; def err_defaulted_special_member_explicit_object_mismatch : Error< "the type of the explicit object parameter of an explicitly-defaulted " - "%select{copy|move}0 assignment operator should match the type of the class %1">; + "%select{copy|move}0 assignment operator should be reference to %1">; def err_defaulted_special_member_volatile_param : Error< "the parameter for an explicitly-defaulted %sub{select_special_member_kind}0 " "may not be volatile">; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index c87edf41f35cee8..9ca91a2def39f5c 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -7644,9 +7644,13 @@ bool Sema::CheckExplicitlyDefaultedSpecialMember(CXXMethodDecl *MD, // parameter is of (possibly different) type “reference to C”, // in which case the type of F1 would differ from the type of F2 // in that the type of F1 has an additional parameter; - if (!Context.hasSameType( - ThisType.getNonReferenceType().getUnqualifiedType(), - Context.getRecordType(RD))) { + QualType ExplicitObjectParameter = MD->isExplicitObjectMemberFunction() + ? MD->getParamDecl(0)->getType() + : QualType(); + if (!ExplicitObjectParameter.isNull() && + (!ExplicitObjectParameter->isReferenceType() || + !Context.hasSameType(ExplicitObjectParameter.getNonReferenceType(), + Context.getRecordType(RD)))) { if (DeleteOnTypeMismatch) ShouldDeleteForTypeMismatch = true; else { @@ -8730,8 +8734,9 @@ bool Sema::CheckExplicitlyDefaultedComparison(Scope *S, FunctionDecl *FD, // If we're out-of-class, this is the class we're comparing. if (!RD) RD = MD->getParent(); - QualType T = MD->getFunctionObjectParameterType(); - if (!T.isConstQualified()) { + QualType T = MD->getFunctionObjectParameterReferenceType(); + if (!T.getNonReferenceType().isConstQualified() && + (MD->isImplicitObjectMemberFunction() || T->isLValueReferenceType())) { SourceLocation Loc, InsertLoc; if (MD->isExplicitObjectMemberFunction()) { Loc = MD->getParamDecl(0)->getBeginLoc(); @@ -8750,11 +8755,17 @@ bool Sema::CheckExplicitlyDefaultedComparison(Scope *S, FunctionDecl *FD, } // Add the 'const' to the type to recover. - const auto *FPT = MD->getType()->castAs(); - FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); - EPI.TypeQuals.addConst(); - MD->setType(Context.getFunctionType(FPT->getReturnType(), - FPT->getParamTypes(), EPI)); + if (MD->isExplicitObjectMemberFunction()) { + assert(T->isLValueReferenceType()); + MD->getParamDecl(0)->setType(Context.getLValueReferenceType( + T.getNonReferenceType().withConst())); + } else { + const auto *FPT = MD->getType()->castAs(); + FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); + EPI.TypeQuals.addConst(); + MD->setType(Context.getFunctionType(FPT->getReturnType(), + FPT->getParamTypes(), EPI)); + } } if (MD->isVolatile()) { @@ -8781,18 +8792,15 @@ bool Sema::CheckExplicitlyDefaultedComparison(Scope *S, FunctionDecl *FD, const ParmVarDecl *KnownParm = nullptr; for (const ParmVarDecl *Param : FD->parameters()) { - if (Param->isExplicitObjectParameter()) - continue; QualType ParmTy = Param->getType(); - if (!KnownParm) { auto CTy = ParmTy; // Is it `T const &`? - bool Ok = !IsMethod; + bool Ok = !IsMethod || FD->hasCXXExplicitFunctionObjectParameter(); QualType ExpectedTy; if (RD) ExpectedTy = Context.getRecordType(RD); - if (auto *Ref = CTy->getAs()) { + if (auto *Ref = CTy->getAs()) { CTy = Ref->getPointeeType(); if (RD) ExpectedTy.addConst(); @@ -8800,14 +8808,11 @@ bool Sema::CheckExplicitlyDefaultedComparison(Scope *S, FunctionDecl *FD, } // Is T a class? - if (!Ok) { - } else if (RD) { - if (!RD->isDependentType() && !Context.hasSameType(CTy, ExpectedTy)) - Ok = false; - } else if (auto *CRD = CTy->getAsRecordDecl()) { - RD = cast(CRD); + if (RD) { + Ok &= RD->isDependentType() || Context.hasSameType(CTy, ExpectedTy); } else { - Ok = false; + RD = CTy->getAsCXXRecordDecl(); + Ok &= RD != nullptr; } if (Ok) { @@ -8847,7 +8852,7 @@ bool Sema::CheckExplicitlyDefaultedComparison(Scope *S, FunctionDecl *FD, assert(FD->getFriendObjectKind() && "expected a friend declaration"); } else { // Out of class, require the defaulted comparison to be a friend (of a - // complete type). + // complete type, per CWG2547). if (RequireCompleteType(FD->getLocation(), Context.getRecordType(RD), diag::err_defaulted_comparison_not_friend, int(DCK), int(1))) diff --git a/clang/test/CXX/class/class.compare/class.compare.default/p1.cpp b/clang/test/CXX/class/class.compare/class.compare.default/p1.cpp index ddf82f432c2eab0..a195e0548152d6c 100644 --- a/clang/test/CXX/class/class.compare/class.compare.default/p1.cpp +++ b/clang/test/CXX/class/class.compare/class.compare.default/p1.cpp @@ -16,6 +16,8 @@ struct A { bool operator<(const A&) const; bool operator<=(const A&) const = default; bool operator==(const A&) const && = default; // expected-error {{ref-qualifier '&&' is not allowed on a defaulted comparison operator}} + bool operator<=(const A&&) const = default; // expected-error {{invalid parameter type for defaulted relational comparison operator; found 'const A &&', expected 'const A &'}} + bool operator<=(const int&) const = default; // expected-error {{invalid parameter type for defaulted relational comparison operator; found 'const int &', expected 'const A &'}} bool operator>=(const A&) const volatile = default; // expected-error {{defaulted comparison function must not be volatile}} bool operator<=>(const A&) = default; // expected-error {{defaulted member three-way comparison operator must be const-qualified}} bool operator>=(const B&) const = default; // expected-error-re {{invalid parameter type for defaulted relational comparison operator; found 'const B &', expected 'const A &'{{$}}}} diff --git a/clang/test/CXX/drs/cwg25xx.cpp b/clang/test/CXX/drs/cwg25xx.cpp index 0d9f5eac23866a2..1924008f15ba580 100644 --- a/clang/test/CXX/drs/cwg25xx.cpp +++ b/clang/test/CXX/drs/cwg25xx.cpp @@ -92,6 +92,30 @@ using ::cwg2521::operator""_div; #endif } // namespace cwg2521 +namespace cwg2547 { // cwg2547: 20 +#if __cplusplus >= 202302L +struct S; +// since-cxx23-note@-1 {{forward declaration of 'cwg2547::S'}} +// since-cxx23-note@-2 {{forward declaration of 'cwg2547::S'}} +// since-cxx23-note@-3 {{forward declaration of 'cwg2547::S'}} +bool operator==(S, S) = default; // error: S is not complete +// since-cxx23-error@-1 {{variable has incomplete type 'S'}} +// since-cxx23-error@-2 {{variable has incomplete type 'S'}} +// since-cxx23-error@-3 {{equality comparison operator is not a friend of incomplete class 'cwg2547::S'}} +struct S { + friend bool operator==(S, const S&) = default; // error: parameters of different types + // since-cxx23-error@-1 {{parameters for defaulted equality comparison operator must have the same type (found 'S' vs 'const S &')}} +}; +enum E { }; +bool operator==(E, E) = default; // error: not a member or friend of a class +// since-cxx23-error@-1 {{invalid parameter type for non-member defaulted equality comparison operator; found 'E', expected class or reference to a constant class}} + +struct S2 { + bool operator==(this int, S2) = default; + // since-cxx23-error@-1 {{invalid parameter type for defaulted equality comparison operator; found 'int', expected 'const cwg2547::S2 &'}} +}; +#endif +} // namespace cwg2547 #if __cplusplus >= 202302L namespace cwg2553 { // cwg2553: 18 review 2023-07-14 @@ -253,6 +277,41 @@ static_assert(__is_layout_compatible(U, V), ""); #endif } // namespace cwg2583 +namespace cwg2586 { // cwg2586: 20 +#if __cplusplus >= 202302L +struct X { + X& operator=(this X&, const X&) = default; + X& operator=(this X&, X&) = default; + X& operator=(this X&&, X&&) = default; + // FIXME: The notes could be clearer on *how* the type differs + // e.g., "if an explicit object parameter is used it must be of type reference to 'X'" + X& operator=(this int, const X&) = default; + // since-cxx23-warning@-1 {{explicitly defaulted copy assignment operator is implicitly deleted}} + // since-cxx23-note@-2 {{function is implicitly deleted because its declared type does not match the type of an implicit copy assignment operator}} + X& operator=(this X, const X&) = default; + // since-cxx23-warning@-1 {{explicitly defaulted copy assignment operator is implicitly deleted}} + // since-cxx23-note@-2 {{function is implicitly deleted because its declared type does not match the type of an implicit copy assignment operator}} +}; +struct Y { + void operator=(this int, const Y&); // This is copy constructor, suppresses implicit declaration +}; +static_assert([]{ + return !requires(T t, const T& ct) { t = ct; }; +}()); + +struct Z { + bool operator==(this const Z&, const Z&) = default; + bool operator==(this Z, Z) = default; + bool operator==(this Z, const Z&) = default; + // since-cxx23-error@-1 {{parameters for defaulted equality comparison operator must have the same type (found 'Z' vs 'const Z &')}} + bool operator==(this const Z&, Z) = default; + // since-cxx23-error@-1 {{parameters for defaulted equality comparison operator must have the same type (found 'const Z &' vs 'Z')}} + bool operator==(this int, Z) = default; + // since-cxx23-error@-1 {{invalid parameter type for defaulted equality comparison operator; found 'int', expected 'const cwg2586::Z &'}} +}; +#endif +} // namespace cwg2586 + namespace cwg2598 { // cwg2598: 18 #if __cplusplus >= 201103L struct NonLiteral { diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp index 45fee6514c12bc6..23fb383fb73cbbe 100644 --- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp +++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp @@ -729,10 +729,10 @@ struct S2 { }; S2& S2::operator=(this int&& self, const S2&) = default; -// expected-error@-1 {{the type of the explicit object parameter of an explicitly-defaulted copy assignment operator should match the type of the class 'S2'}} +// expected-error@-1 {{the type of the explicit object parameter of an explicitly-defaulted copy assignment operator should be reference to 'S2'}} S2& S2::operator=(this int&& self, S2&&) = default; -// expected-error@-1 {{the type of the explicit object parameter of an explicitly-defaulted move assignment operator should match the type of the class 'S2'}} +// expected-error@-1 {{the type of the explicit object parameter of an explicitly-defaulted move assignment operator should be reference to 'S2'}} struct Move { Move& operator=(this int&, Move&&) = default; @@ -972,3 +972,42 @@ struct R { f(r_value_ref); // expected-error {{no matching member function for call to 'f'}} } }; + +namespace GH100329 { +struct A { + bool operator == (this const int&, const A&); +}; +bool A::operator == (this const int&, const A&) = default; +// expected-error@-1 {{invalid parameter type for defaulted equality comparison operator; found 'const int &', expected 'const GH100329::A &'}} +} // namespace GH100329 + +namespace defaulted_assign { +struct A { + A& operator=(this A, const A&) = default; + // expected-warning@-1 {{explicitly defaulted copy assignment operator is implicitly deleted}} + // expected-note@-2 {{function is implicitly deleted because its declared type does not match the type of an implicit copy assignment operator}} + A& operator=(this int, const A&) = default; + // expected-warning@-1 {{explicitly defaulted copy assignment operator is implicitly deleted}} + // expected-note@-2 {{function is implicitly deleted because its declared type does not match the type of an implicit copy assignment operator}} +}; +} // namespace defaulted_assign + +namespace defaulted_compare { +struct A { + bool operator==(this A&, const A&) = default; + // expected-error@-1 {{defaulted member equality comparison operator must be const-qualified}} + bool operator==(this const A, const A&) = default; + // expected-error@-1 {{invalid parameter type for defaulted equality comparison operator; found 'const A', expected 'const defaulted_compare::A &'}} + bool operator==(this A, A) = default; +}; +struct B { + int a; + bool operator==(this B, B) = default; +}; +static_assert(B{0} == B{0}); +static_assert(B{0} != B{1}); +template +struct X; +static_assert(__is_same(X, X)); +static_assert(!__is_same(X, X)); +} // namespace defaulted_compare diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 6c283b68aa9656f..9b0de55483d2755 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -15097,7 +15097,7 @@

C++ defect report implementation status

2547 DRWP Defaulted comparison operator function for non-classes - Unknown + Clang 20 2548 @@ -15331,7 +15331,7 @@

C++ defect report implementation status

2586 CD6 Explicit object parameter for assignment and comparison - Unknown + Clang 20 2587 From 2d52eb6a434fe47e67086f5ec1c3789bf6e7a604 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 14 Aug 2024 12:17:19 -0700 Subject: [PATCH 036/441] [SLP][NFC]Remove unused using declarations, reduce mem usage in containers, NFC --- .../Transforms/Vectorize/SLPVectorizer.cpp | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 87b4ed599b5bb15..15cff2f10030109 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1244,12 +1244,10 @@ class BoUpSLP { }; using ValueList = SmallVector; - using InstrList = SmallVector; using ValueSet = SmallPtrSet; - using StoreList = SmallVector; using ExtraValueToDebugLocsMap = MapVector>; - using OrdersType = SmallVector; + using OrdersType = SmallVector; BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li, @@ -1471,7 +1469,7 @@ class BoUpSLP { /// \param TryRecursiveCheck used to check if long masked gather can be /// represented as a serie of loads/insert subvector, if profitable. LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, - SmallVectorImpl &Order, + OrdersType &Order, SmallVectorImpl &PointerOps, bool TryRecursiveCheck = true) const; @@ -2840,7 +2838,7 @@ class BoUpSLP { /// \param ResizeAllowed indicates whether it is allowed to handle subvector /// extract order. bool canReuseExtract(ArrayRef VL, Value *OpValue, - SmallVectorImpl &CurrentOrder, + OrdersType &CurrentOrder, bool ResizeAllowed = false) const; /// Vectorize a single entry in the tree. @@ -3084,10 +3082,10 @@ class BoUpSLP { CombinedOpcode CombinedOp = NotCombinedOp; /// Does this sequence require some shuffling? - SmallVector ReuseShuffleIndices; + SmallVector ReuseShuffleIndices; /// Does this entry require reordering? - SmallVector ReorderIndices; + OrdersType ReorderIndices; /// Points back to the VectorizableTree. /// @@ -4300,12 +4298,12 @@ static void reorderReuses(SmallVectorImpl &Reuses, ArrayRef Mask) { /// the original order of the scalars. Procedure transforms the provided order /// in accordance with the given \p Mask. If the resulting \p Order is just an /// identity order, \p Order is cleared. -static void reorderOrder(SmallVectorImpl &Order, ArrayRef Mask, +static void reorderOrder(BoUpSLP::OrdersType &Order, ArrayRef Mask, bool BottomOrder = false) { assert(!Mask.empty() && "Expected non-empty mask."); unsigned Sz = Mask.size(); if (BottomOrder) { - SmallVector PrevOrder; + BoUpSLP::OrdersType PrevOrder; if (Order.empty()) { PrevOrder.resize(Sz); std::iota(PrevOrder.begin(), PrevOrder.end(), 0); @@ -4695,7 +4693,7 @@ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind, } BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( - ArrayRef VL, const Value *VL0, SmallVectorImpl &Order, + ArrayRef VL, const Value *VL0, OrdersType &Order, SmallVectorImpl &PointerOps, bool TryRecursiveCheck) const { // Check that a vectorized load would load the same memory as a scalar // load. For example, we don't want to vectorize loads that are smaller @@ -4823,7 +4821,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( for (unsigned Cnt = 0, End = VL.size(); Cnt + VF <= End; Cnt += VF, ++VectorizedCnt) { ArrayRef Slice = VL.slice(Cnt, VF); - SmallVector Order; + OrdersType Order; SmallVector PointerOps; LoadsState LS = canVectorizeLoads(Slice, Slice.front(), Order, PointerOps, @@ -5397,7 +5395,7 @@ void BoUpSLP::reorderNodeWithReuses(TreeEntry &TE, ArrayRef Mask) const { TE.ReorderIndices.clear(); // Try to improve gathered nodes with clustered reuses, if possible. ArrayRef Slice = ArrayRef(NewMask).slice(0, Sz); - SmallVector NewOrder(Slice); + OrdersType NewOrder(Slice); inversePermutation(NewOrder, NewMask); reorderScalars(TE.Scalars, NewMask); // Fill the reuses mask with the identity submasks. @@ -7717,7 +7715,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const { } bool BoUpSLP::canReuseExtract(ArrayRef VL, Value *OpValue, - SmallVectorImpl &CurrentOrder, + OrdersType &CurrentOrder, bool ResizeAllowed) const { const auto *It = find_if(VL, IsaPred); assert(It != VL.end() && "Expected at least one extract instruction."); From db8ef6188cbbe2125e6d60bdef77a535105772df Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Thu, 15 Aug 2024 08:15:30 -0700 Subject: [PATCH 037/441] [NFC] Fix code line exceeding 80 columns (#104428) --- llvm/tools/llc/llc.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index d3f7c2b7f819b68..80c84a977c26c64 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -130,8 +130,9 @@ static cl::opt SplitDwarfFile( static cl::opt NoVerify("disable-verify", cl::Hidden, cl::desc("Do not verify input module")); -static cl::opt DisableSimplifyLibCalls("disable-simplify-libcalls", - cl::desc("Disable simplify-libcalls")); +static cl::opt + DisableSimplifyLibCalls("disable-simplify-libcalls", + cl::desc("Disable simplify-libcalls")); static cl::opt ShowMCEncoding("show-mc-encoding", cl::Hidden, cl::desc("Show encoding in .s output")); From 11c2da8fb7dc4d5dede094fa61077827004a3997 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 15 Aug 2024 23:18:03 +0800 Subject: [PATCH 038/441] [RISCV] Narrow indices to e16 for LMUL > 1 when lowering vector_reverse (#104427) The vector_shuffle lowering already does this to reduce register pressure, so also do it here. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 11 ++- .../RISCV/rvv/named-vector-shuffle-reverse.ll | 84 +++++++++++-------- 2 files changed, 59 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 911fa45d7173e88..e16dd132ed71bb0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10321,8 +10321,6 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, // If this is SEW=8 and VLMAX is potentially more than 256, we need // to use vrgatherei16.vv. - // TODO: It's also possible to use vrgatherei16.vv for other types to - // decrease register width for the index calculation. if (MaxVLMAX > 256 && EltSize == 8) { // If this is LMUL=8, we have to split before can use vrgatherei16.vv. // Reverse each half, then reassemble them in reverse order. @@ -10348,6 +10346,15 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; } + // At LMUL > 1, do the index computation in 16 bits to reduce register + // pressure. + if (IntVT.getScalarType().bitsGT(MVT::i16) && + IntVT.bitsGT(getLMUL1VT(IntVT))) { + assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b + GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; + IntVT = IntVT.changeVectorElementType(MVT::i16); + } + MVT XLenVT = Subtarget.getXLenVT(); auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll index 96094eea631ba50..60a03f1d97e5380 100644 --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -1285,10 +1285,11 @@ define @reverse_nxv4i32( %a) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv4i32( %a) @@ -1300,10 +1301,11 @@ define @reverse_nxv8i32( %a) { ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 -; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv8i32( %a) @@ -1316,10 +1318,11 @@ define @reverse_nxv16i32( %a) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 -; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vrgatherei16.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv16i32( %a) @@ -1348,10 +1351,11 @@ define @reverse_nxv2i64( %a) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv2i64( %a) @@ -1364,10 +1368,11 @@ define @reverse_nxv4i64( %a) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 -; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv4i64( %a) @@ -1379,10 +1384,11 @@ define @reverse_nxv8i64( %a) { ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 -; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vrgatherei16.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv8i64( %a) @@ -1526,10 +1532,11 @@ define @reverse_nxv4f32( %a) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv4f32( %a) @@ -1541,10 +1548,11 @@ define @reverse_nxv8f32( %a) { ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 -; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv8f32( %a) @@ -1557,10 +1565,11 @@ define @reverse_nxv16f32( %a) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 -; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vrgatherei16.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv16f32( %a) @@ -1589,10 +1598,11 @@ define @reverse_nxv2f64( %a) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv2f64( %a) @@ -1605,10 +1615,11 @@ define @reverse_nxv4f64( %a) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 -; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv4f64( %a) @@ -1620,10 +1631,11 @@ define @reverse_nxv8f64( %a) { ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 -; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vrgatherei16.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv8f64( %a) @@ -1638,10 +1650,11 @@ define @reverse_nxv3i64( %a) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v12, v12, a0 -; CHECK-NEXT: vrgather.vv v16, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v16, v8, v12 ; CHECK-NEXT: vmv1r.v v8, v17 ; CHECK-NEXT: vmv1r.v v9, v18 ; CHECK-NEXT: vmv1r.v v10, v19 @@ -1655,10 +1668,11 @@ define @reverse_nxv6i64( %a) { ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v16, v16, a0 -; CHECK-NEXT: vrgather.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv2r.v v8, v26 ; CHECK-NEXT: vmv2r.v v10, v28 ; CHECK-NEXT: vmv2r.v v12, v30 @@ -1684,12 +1698,13 @@ define @reverse_nxv12i64( %a) { ; RV32-NEXT: andi sp, sp, -64 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: addi a1, a0, -1 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma ; RV32-NEXT: vid.v v24 ; RV32-NEXT: vrsub.vx v24, v24, a1 -; RV32-NEXT: vrgather.vv v0, v16, v24 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vrgatherei16.vv v0, v16, v24 ; RV32-NEXT: vmv4r.v v16, v4 -; RV32-NEXT: vrgather.vv v0, v8, v24 +; RV32-NEXT: vrgatherei16.vv v0, v8, v24 ; RV32-NEXT: vmv4r.v v20, v0 ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: addi a1, sp, 64 @@ -1720,12 +1735,13 @@ define @reverse_nxv12i64( %a) { ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: addi a1, a0, -1 -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma ; RV64-NEXT: vid.v v24 ; RV64-NEXT: vrsub.vx v24, v24, a1 -; RV64-NEXT: vrgather.vv v0, v16, v24 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v0, v16, v24 ; RV64-NEXT: vmv4r.v v16, v4 -; RV64-NEXT: vrgather.vv v0, v8, v24 +; RV64-NEXT: vrgatherei16.vv v0, v8, v24 ; RV64-NEXT: vmv4r.v v20, v0 ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: addi a1, sp, 64 From 57abd4e4abb705a453134051743542de5fd396bc Mon Sep 17 00:00:00 2001 From: Daniel Grumberg Date: Thu, 15 Aug 2024 16:23:31 +0100 Subject: [PATCH 039/441] [clang][ExtractAPI] Emit environment component of target triple in SGF (#103273) rdar://133533830 --- .../Serialization/SymbolGraphSerializer.cpp | 4 ++++ .../test/ExtractAPI/platform-serialization.c | 20 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 clang/test/ExtractAPI/platform-serialization.c diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index 84ed5467dd2fb91..1f8029cbd39ad25 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -104,6 +104,10 @@ Object serializePlatform(const Triple &T) { Object Platform; Platform["architecture"] = T.getArchName(); Platform["vendor"] = T.getVendorName(); + + if (!T.getEnvironmentName().empty()) + Platform["environment"] = T.getEnvironmentName(); + Platform["operatingSystem"] = serializeOperatingSystem(T); return Platform; } diff --git a/clang/test/ExtractAPI/platform-serialization.c b/clang/test/ExtractAPI/platform-serialization.c new file mode 100644 index 000000000000000..6d6a13f085cd9d9 --- /dev/null +++ b/clang/test/ExtractAPI/platform-serialization.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-ios17.1-macabi \ +// RUN: -x c-header %s -verify -o - | FileCheck %s + +int a; + +// CHECK: "platform": { +// CHECK-NEXT: "architecture": "arm64", +// CHECK-NEXT: "environment": "macabi", +// CHECK-NEXT: "operatingSystem": { +// CHECK-NEXT: "minimumVersion": { +// CHECK-NEXT: "major": 14, +// CHECK-NEXT: "minor": 0, +// CHECK-NEXT: "patch": 0 +// CHECK-NEXT: }, +// CHECK-NEXT: "name": "ios" +// CHECK-NEXT: }, +// CHECK-NEXT: "vendor": "apple" +// CHECK-NEXT: } + +// expected-no-diagnostics From 598970904736f3535939f6a5525022219e4ae517 Mon Sep 17 00:00:00 2001 From: Hansang Bae Date: Thu, 15 Aug 2024 10:42:22 -0500 Subject: [PATCH 040/441] [OpenMP] Miscellaneous small code improvements (#95603) Removes a few uninitialized variables, possible resource leaks, and redundant code. --- openmp/runtime/src/kmp.h | 2 ++ openmp/runtime/src/kmp_affinity.cpp | 3 +-- openmp/runtime/src/kmp_affinity.h | 8 +++++--- openmp/runtime/src/kmp_barrier.cpp | 3 ++- openmp/runtime/src/kmp_csupport.cpp | 6 +++--- openmp/runtime/src/kmp_runtime.cpp | 12 ++++++------ openmp/runtime/src/kmp_tasking.cpp | 2 +- openmp/runtime/src/kmp_wait_release.h | 10 ++++++---- openmp/runtime/src/ompt-general.cpp | 6 ++++++ 9 files changed, 32 insertions(+), 20 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 916c1dc25700875..4f4fb3292d63eec 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4731,6 +4731,8 @@ class kmp_safe_raii_file_t { : f(nullptr) { open(filename, mode, env_var); } + kmp_safe_raii_file_t(const kmp_safe_raii_file_t &other) = delete; + kmp_safe_raii_file_t &operator=(const kmp_safe_raii_file_t &other) = delete; ~kmp_safe_raii_file_t() { close(); } /// Open filename using mode. This is automatically closed in the destructor. diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index dd48b1ea5c13f9f..cf5cad04eb57d56 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -1970,7 +1970,6 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) { hw_thread.ids[index + 1] = sub_id; index--; } - prev = memory; } prev = obj; } @@ -4989,7 +4988,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) { int depth = __kmp_topology->get_depth(); // Create the table of masks, indexed by thread Id. - unsigned numUnique; + unsigned numUnique = 0; int numAddrs = __kmp_topology->get_num_hw_threads(); // If OMP_PLACES=cores: specified, then attempt // to make OS Id mask table using those attributes diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h index ed24b6faf2f7e8f..9ab2c0cc70d8c6c 100644 --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -29,6 +29,8 @@ class KMPHwlocAffinity : public KMPAffinity { mask = hwloc_bitmap_alloc(); this->zero(); } + Mask(const Mask &other) = delete; + Mask &operator=(const Mask &other) = delete; ~Mask() { hwloc_bitmap_free(mask); } void set(int i) override { hwloc_bitmap_set(mask, i); } bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } @@ -1271,7 +1273,7 @@ class hierarchy_info { leaf. It corresponds to the number of entries in numPerLevel if we exclude all but one trailing 1. */ kmp_uint32 depth; - kmp_uint32 base_num_threads; + kmp_uint32 base_num_threads = 0; enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, // 2=initialization in progress @@ -1281,8 +1283,8 @@ class hierarchy_info { the parent of a node at level i has. For example, if we have a machine with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */ - kmp_uint32 *numPerLevel; - kmp_uint32 *skipPerLevel; + kmp_uint32 *numPerLevel = nullptr; + kmp_uint32 *skipPerLevel = nullptr; void deriveLevels() { int hier_depth = __kmp_topology->get_depth(); diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index 658cee594e48d52..d7ef57c608149e7 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -444,7 +444,8 @@ static void __kmp_dist_barrier_release( next_go = my_current_iter + distributedBarrier::MAX_ITERS; my_go_index = tid / b->threads_per_go; if (this_thr->th.th_used_in_team.load() == 3) { - KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1); + (void)KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, + 1); } // Check if go flag is set if (b->go[my_go_index].go.load() != next_go) { diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index b33c16fa79a658f..fdbf9ff45e35495 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -1589,7 +1589,7 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); if (*lk == 0) { if (KMP_IS_D_LOCK(lockseq)) { - KMP_COMPARE_AND_STORE_ACQ32( + (void)KMP_COMPARE_AND_STORE_ACQ32( (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0, KMP_GET_D_TAG(lockseq)); } else { @@ -3486,8 +3486,8 @@ __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, // Check if it is initialized. if (*lk == 0) { if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { - KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, - KMP_GET_D_TAG(__kmp_user_lock_seq)); + (void)KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, + KMP_GET_D_TAG(__kmp_user_lock_seq)); } else { __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq)); diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 5b4391aa125d417..06bc4939359e264 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -1983,8 +1983,8 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_SUPPORT ompt_data_t ompt_parallel_data = ompt_data_none; - ompt_data_t *parent_task_data; - ompt_frame_t *ompt_frame; + ompt_data_t *parent_task_data = NULL; + ompt_frame_t *ompt_frame = NULL; void *return_address = NULL; if (ompt_enabled.enabled) { @@ -5765,8 +5765,8 @@ void __kmp_free_team(kmp_root_t *root, for (f = 1; f < team->t.t_nproc; ++f) { KMP_DEBUG_ASSERT(team->t.t_threads[f]); if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), - 1, 2); + (void)KMP_COMPARE_AND_STORE_ACQ32( + &(team->t.t_threads[f]->th.th_used_in_team), 1, 2); } __kmp_free_thread(team->t.t_threads[f]); } @@ -9220,8 +9220,8 @@ void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) { // to wake it up. for (int f = 1; f < new_nthreads; ++f) { KMP_DEBUG_ASSERT(team->t.t_threads[f]); - KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0, - 3); + (void)KMP_COMPARE_AND_STORE_ACQ32( + &(team->t.t_threads[f]->th.th_used_in_team), 0, 3); if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid, (kmp_flag_32 *)NULL); diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 03ce0dd752af156..7edaa8e127e52c0 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -5276,7 +5276,7 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, switch (sched) { case 0: // no schedule clause specified, we can choose the default // let's try to schedule (team_size*10) tasks - grainsize = thread->th.th_team_nproc * 10; + grainsize = thread->th.th_team_nproc * static_cast(10); KMP_FALLTHROUGH(); case 2: // num_tasks provided if (grainsize > tc) { diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h index 97db68943da7027..9baf280228ee52f 100644 --- a/openmp/runtime/src/kmp_wait_release.h +++ b/openmp/runtime/src/kmp_wait_release.h @@ -104,7 +104,8 @@ template <> struct flag_traits { template class kmp_flag { protected: flag_properties t; /**< "Type" of the flag in loc */ - kmp_info_t *waiting_threads[1]; /**< Threads sleeping on this thread. */ + /**< Threads sleeping on this thread. */ + kmp_info_t *waiting_threads[1] = {nullptr}; kmp_uint32 num_waiting_threads; /**< Num threads sleeping on this thread. */ std::atomic *sleepLoc; @@ -140,7 +141,7 @@ template class kmp_flag_native : public kmp_flag { protected: volatile PtrType *loc; - PtrType checker; /**< When flag==checker, it has been released. */ + PtrType checker = (PtrType)0; /**< When flag==checker, it has been released */ typedef flag_traits traits_type; public: @@ -234,7 +235,7 @@ template class kmp_flag_atomic : public kmp_flag { protected: std::atomic *loc; /**< Pointer to flag location to wait on */ - PtrType checker; /**< Flag == checker means it has been released. */ + PtrType checker = (PtrType)0; /**< Flag==checker means it has been released */ public: typedef flag_traits traits_type; typedef PtrType flag_t; @@ -935,7 +936,8 @@ class kmp_flag_oncore : public kmp_flag_native { kmp_uint32 offset; /**< Portion of flag of interest for an operation. */ bool flag_switch; /**< Indicates a switch in flag location. */ enum barrier_type bt; /**< Barrier type. */ - kmp_info_t *this_thr; /**< Thread to redirect to different flag location. */ + /**< Thread to redirect to different flag location. */ + kmp_info_t *this_thr = nullptr; #if USE_ITT_BUILD void *itt_sync_obj; /**< ITT object to pass to new flag location. */ #endif diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp index e07c5ff4fcc47d8..923eea2a563a917 100644 --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -104,9 +104,11 @@ static ompt_start_tool_result_t *ompt_start_tool_result = NULL; #if KMP_OS_WINDOWS static HMODULE ompt_tool_module = NULL; +static HMODULE ompt_archer_module = NULL; #define OMPT_DLCLOSE(Lib) FreeLibrary(Lib) #else static void *ompt_tool_module = NULL; +static void *ompt_archer_module = NULL; #define OMPT_DLCLOSE(Lib) dlclose(Lib) #endif @@ -374,6 +376,7 @@ ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) { "Tool was started and is using the OMPT interface.\n"); OMPT_VERBOSE_INIT_PRINT( "----- END LOGGING OF TOOL REGISTRATION -----\n"); + ompt_archer_module = h; return ret; } OMPT_VERBOSE_INIT_CONTINUED_PRINT( @@ -381,6 +384,7 @@ ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) { } else { OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: %s\n", dlerror()); } + OMPT_DLCLOSE(h); } } #endif @@ -521,6 +525,8 @@ void ompt_fini() { } } + if (ompt_archer_module) + OMPT_DLCLOSE(ompt_archer_module); if (ompt_tool_module) OMPT_DLCLOSE(ompt_tool_module); memset(&ompt_enabled, 0, sizeof(ompt_enabled)); From e63b7ba0eb497d10d643aa5e27461c6c1bf8e221 Mon Sep 17 00:00:00 2001 From: Kevin McAfee Date: Thu, 15 Aug 2024 08:49:55 -0700 Subject: [PATCH 041/441] [InstCombine][NFC] Add tests for shifts of constants by common factor (#103471) --- .../test/Transforms/InstCombine/shl-factor.ll | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/shl-factor.ll b/llvm/test/Transforms/InstCombine/shl-factor.ll index 38eecaeff8e41e3..996b15f27f6d345 100644 --- a/llvm/test/Transforms/InstCombine/shl-factor.ll +++ b/llvm/test/Transforms/InstCombine/shl-factor.ll @@ -265,3 +265,27 @@ define i6 @sub_shl_same_amount_partial_nuw2(i6 %x, i6 %y, i6 %z) { ret i6 %diff } +define i8 @add_shl_same_amount_constants(i8 %z) { +; CHECK-LABEL: @add_shl_same_amount_constants( +; CHECK-NEXT: [[SUM:%.*]] = shl i8 7, [[Z:%.*]] +; CHECK-NEXT: ret i8 [[SUM]] +; + %s1 = shl i8 4, %z + %s2 = shl i8 3, %z + %sum = add i8 %s1, %s2 + ret i8 %sum +} + +define i8 @add_shl_same_amount_constants_extra_use(i8 %z) { +; CHECK-LABEL: @add_shl_same_amount_constants_extra_use( +; CHECK-NEXT: [[S1:%.*]] = shl i8 4, [[Z:%.*]] +; CHECK-NEXT: [[SUM:%.*]] = shl i8 7, [[Z]] +; CHECK-NEXT: call void @use8(i8 [[S1]]) +; CHECK-NEXT: ret i8 [[SUM]] +; + %s1 = shl i8 4, %z + %s2 = shl i8 3, %z + %sum = add i8 %s1, %s2 + call void @use8(i8 %s1) + ret i8 %sum +} From 51328b78dc2b0be20e8d67f57f64445cec25162c Mon Sep 17 00:00:00 2001 From: sp Date: Thu, 15 Aug 2024 17:00:03 +0100 Subject: [PATCH 042/441] [NFC] Fix spelling of "definitely". (#104455) This corrects the misspelling of "definitely" as "definately". No functional changes. --- clang/lib/StaticAnalyzer/Checkers/StringChecker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/StringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StringChecker.cpp index 8f1c31763e212c3..f50b5cd3c9e497e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StringChecker.cpp @@ -48,7 +48,7 @@ bool StringChecker::isCharToStringCtor(const CallEvent &Call, // Verify that the parameters have the expected types: // - arg 1: `const CharT *` - // - arg 2: some allocator - which is definately not `size_t`. + // - arg 2: some allocator - which is definitely not `size_t`. const QualType Arg1Ty = Call.getArgExpr(0)->getType().getCanonicalType(); const QualType Arg2Ty = Call.getArgExpr(1)->getType().getCanonicalType(); From 64c856055aed97603510410fd2feab273e1d5b8a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 15 Aug 2024 09:01:06 -0700 Subject: [PATCH 043/441] Revert "[Object][x86-64] Add support for `R_X86_64_GLOB_DAT` relocations. (#103029)" (#103497) This reverts commit 5ae9faa538d100ab38f6f4f99c924de0e4270272. RelocationResolver is only supposed to handle static relocation types. Introducing GLOB_DAT could negatively impact other RelocationResolver users who solely handle static relocations and want to report errors for dynamic relocations. If GLOB_DAT is the sole required relocation, explicitly checking for it in the caller would be more reliable. Additionally, the caller should handle GLOB_DAT on other architectures. --- llvm/lib/Object/RelocationResolver.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp index 7f90834c3d21997..d9bb8f175ef83ac 100644 --- a/llvm/lib/Object/RelocationResolver.cpp +++ b/llvm/lib/Object/RelocationResolver.cpp @@ -47,7 +47,6 @@ static bool supportsX86_64(uint64_t Type) { case ELF::R_X86_64_PC64: case ELF::R_X86_64_32: case ELF::R_X86_64_32S: - case ELF::R_X86_64_GLOB_DAT: return true; default: return false; @@ -69,8 +68,6 @@ static uint64_t resolveX86_64(uint64_t Type, uint64_t Offset, uint64_t S, case ELF::R_X86_64_32: case ELF::R_X86_64_32S: return (S + Addend) & 0xFFFFFFFF; - case ELF::R_X86_64_GLOB_DAT: - return S; default: llvm_unreachable("Invalid relocation type"); } From 46fb225f3ac602970ebb8973a5376cd9216ba38f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 15 Aug 2024 18:07:18 +0200 Subject: [PATCH 044/441] [InstSimplify] Add tests for f16 to i128 range (NFC) --- .../Transforms/InstSimplify/fptoi-range.ll | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/fptoi-range.ll b/llvm/test/Transforms/InstSimplify/fptoi-range.ll index cc9605259aa811e..e0689c26c719031 100644 --- a/llvm/test/Transforms/InstSimplify/fptoi-range.ll +++ b/llvm/test/Transforms/InstSimplify/fptoi-range.ll @@ -32,6 +32,17 @@ define i1 @f16_si16_max2(half %f) { ret i1 %c } +define i1 @f16_si128_max2(half %f) { +; CHECK-LABEL: @f16_si128_max2( +; CHECK-NEXT: [[I:%.*]] = fptosi half [[F:%.*]] to i128 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i128 [[I]], 65504 +; CHECK-NEXT: ret i1 [[C]] +; + %i = fptosi half %f to i128 + %c = icmp sgt i128 %i, 65504 + ret i1 %c +} + define i1 @f16_si_min1(half %f) { ; CHECK-LABEL: @f16_si_min1( ; CHECK-NEXT: ret i1 true @@ -41,6 +52,17 @@ define i1 @f16_si_min1(half %f) { ret i1 %c } +define i1 @f16_si128_min1(half %f) { +; CHECK-LABEL: @f16_si128_min1( +; CHECK-NEXT: [[I:%.*]] = fptosi half [[F:%.*]] to i128 +; CHECK-NEXT: [[C:%.*]] = icmp sge i128 [[I]], -65504 +; CHECK-NEXT: ret i1 [[C]] +; + %i = fptosi half %f to i128 + %c = icmp sge i128 %i, -65504 + ret i1 %c +} + define i1 @f16_si16_min1(half %f) { ; CHECK-LABEL: @f16_si16_min1( ; CHECK-NEXT: [[I:%.*]] = fptosi half [[F:%.*]] to i16 From afa0f53f96b5563a80fbdf8c41c8153bf8cd2685 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 15 Aug 2024 18:08:27 +0200 Subject: [PATCH 045/441] [ValueTracking] Fix f16 fptosi range for large integers We were missing the signed flag on the negative value, so the range was incorrectly interpreted for integers larger than 64-bit. Split out from https://github.com/llvm/llvm-project/pull/80309. --- llvm/lib/Analysis/ValueTracking.cpp | 2 +- .../CodeGen/Thumb2/mve-fpclamptosat_vec.ll | 194 ++++-------------- .../Transforms/InstSimplify/fptoi-range.ll | 8 +- 3 files changed, 43 insertions(+), 161 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b6414a3e7990b0a..014a703a0005240 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -9671,7 +9671,7 @@ static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) { if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy()) return; if (isa(I) && BitWidth >= 17) { - Lower = APInt(BitWidth, -65504); + Lower = APInt(BitWidth, -65504, true); Upper = APInt(BitWidth, 65505); } diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index 94210d795867a0f..f2ac5268921800c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -1012,60 +1012,21 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-LABEL: stest_f16i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: mvn r9, #-2147483648 -; CHECK-NEXT: sbcs.w r7, r1, r9 -; CHECK-NEXT: mov.w r10, #-2147483648 -; CHECK-NEXT: sbcs r7, r2, #0 -; CHECK-NEXT: sbcs r7, r3, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: mov.w r7, #-1 -; CHECK-NEXT: csel r1, r1, r9, ne -; CHECK-NEXT: csel r4, r0, r7, ne -; CHECK-NEXT: rsbs r0, r4, #0 -; CHECK-NEXT: sbcs.w r0, r10, r1 -; CHECK-NEXT: sbcs.w r0, r7, r2 -; CHECK-NEXT: sbcs.w r0, r7, r3 -; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r8, r1, r10, ne +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: subs.w r6, r0, #-1 -; CHECK-NEXT: sbcs.w r6, r1, r9 -; CHECK-NEXT: sbcs r6, r2, #0 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: csel r1, r1, r9, ne -; CHECK-NEXT: csel r3, r3, r6, ne -; CHECK-NEXT: csel r2, r2, r6, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r10, r1 -; CHECK-NEXT: sbcs.w r2, r7, r2 -; CHECK-NEXT: sbcs.w r2, r7, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r10, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r3, r4, r5, ne -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r3 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1105,46 +1066,28 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: subs r5, r2, #1 -; CHECK-NEXT: mov.w r8, #1 -; CHECK-NEXT: sbcs r5, r3, #0 -; CHECK-NEXT: mov.w r7, #0 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: csel r4, r1, r5, ne +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: sbcs.w r1, r7, r4 -; CHECK-NEXT: sbcs.w r1, r7, r2 -; CHECK-NEXT: sbcs.w r1, r7, r3 +; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: sbcs.w r1, r5, r4 +; CHECK-NEXT: sbcs.w r1, r5, r2 +; CHECK-NEXT: sbcs.w r1, r5, r3 ; CHECK-NEXT: cset r6, lt ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r9, r0, r6, ne +; CHECK-NEXT: csel r8, r0, r6, ne ; CHECK-NEXT: vmov.u16 r0, q4[0] ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: subs r5, r2, #1 -; CHECK-NEXT: sbcs r5, r3, #0 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r7, r1 -; CHECK-NEXT: sbcs.w r2, r7, r2 -; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK-NEXT: rsbs r7, r0, #0 +; CHECK-NEXT: sbcs.w r7, r5, r1 +; CHECK-NEXT: sbcs.w r2, r5, r2 +; CHECK-NEXT: sbcs.w r2, r5, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne @@ -1152,11 +1095,10 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEXT: csel r3, r4, r6, ne ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r9 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r8 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -2119,60 +2061,21 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: stest_f16i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: mvn r9, #-2147483648 -; CHECK-NEXT: sbcs.w r7, r1, r9 -; CHECK-NEXT: mov.w r10, #-2147483648 -; CHECK-NEXT: sbcs r7, r2, #0 -; CHECK-NEXT: sbcs r7, r3, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: mov.w r7, #-1 -; CHECK-NEXT: csel r1, r1, r9, ne -; CHECK-NEXT: csel r4, r0, r7, ne -; CHECK-NEXT: rsbs r0, r4, #0 -; CHECK-NEXT: sbcs.w r0, r10, r1 -; CHECK-NEXT: sbcs.w r0, r7, r2 -; CHECK-NEXT: sbcs.w r0, r7, r3 -; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r8, r1, r10, ne +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: subs.w r6, r0, #-1 -; CHECK-NEXT: sbcs.w r6, r1, r9 -; CHECK-NEXT: sbcs r6, r2, #0 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: csel r1, r1, r9, ne -; CHECK-NEXT: csel r3, r3, r6, ne -; CHECK-NEXT: csel r2, r2, r6, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r10, r1 -; CHECK-NEXT: sbcs.w r2, r7, r2 -; CHECK-NEXT: sbcs.w r2, r7, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r10, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r3, r4, r5, ne -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r3 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2209,51 +2112,34 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r5, r0, r6, ne -; CHECK-NEXT: csel r7, r3, r6, ne +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r5, #0 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: mov r6, r3 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r3, r3, r2, ne -; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r4, #0 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r0, #0 ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: csel r6, r4, r6, ne -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 ; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r6, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: movmi r5, #0 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r6 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) diff --git a/llvm/test/Transforms/InstSimplify/fptoi-range.ll b/llvm/test/Transforms/InstSimplify/fptoi-range.ll index e0689c26c719031..95f2a9d50793c64 100644 --- a/llvm/test/Transforms/InstSimplify/fptoi-range.ll +++ b/llvm/test/Transforms/InstSimplify/fptoi-range.ll @@ -34,9 +34,7 @@ define i1 @f16_si16_max2(half %f) { define i1 @f16_si128_max2(half %f) { ; CHECK-LABEL: @f16_si128_max2( -; CHECK-NEXT: [[I:%.*]] = fptosi half [[F:%.*]] to i128 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i128 [[I]], 65504 -; CHECK-NEXT: ret i1 [[C]] +; CHECK-NEXT: ret i1 false ; %i = fptosi half %f to i128 %c = icmp sgt i128 %i, 65504 @@ -54,9 +52,7 @@ define i1 @f16_si_min1(half %f) { define i1 @f16_si128_min1(half %f) { ; CHECK-LABEL: @f16_si128_min1( -; CHECK-NEXT: [[I:%.*]] = fptosi half [[F:%.*]] to i128 -; CHECK-NEXT: [[C:%.*]] = icmp sge i128 [[I]], -65504 -; CHECK-NEXT: ret i1 [[C]] +; CHECK-NEXT: ret i1 true ; %i = fptosi half %f to i128 %c = icmp sge i128 %i, -65504 From b6bb208662b980b3c29194f63f22e3af8f772a57 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 15 Aug 2024 09:18:33 -0700 Subject: [PATCH 046/441] Revert "[SLP][NFC]Remove unused using declarations, reduce mem usage in containers, NFC" This reverts commit 2d52eb6a434fe47e67086f5ec1c3789bf6e7a604 to fix compile time regression found in https://llvm-compile-time-tracker.com/compare.php?from=fcefe957ddfdc5a2fe9463757b597635e3436e01&to=2d52eb6a434fe47e67086f5ec1c3789bf6e7a604&stat=instructions%3Au. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 15cff2f10030109..87b4ed599b5bb15 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1244,10 +1244,12 @@ class BoUpSLP { }; using ValueList = SmallVector; + using InstrList = SmallVector; using ValueSet = SmallPtrSet; + using StoreList = SmallVector; using ExtraValueToDebugLocsMap = MapVector>; - using OrdersType = SmallVector; + using OrdersType = SmallVector; BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li, @@ -1469,7 +1471,7 @@ class BoUpSLP { /// \param TryRecursiveCheck used to check if long masked gather can be /// represented as a serie of loads/insert subvector, if profitable. LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, - OrdersType &Order, + SmallVectorImpl &Order, SmallVectorImpl &PointerOps, bool TryRecursiveCheck = true) const; @@ -2838,7 +2840,7 @@ class BoUpSLP { /// \param ResizeAllowed indicates whether it is allowed to handle subvector /// extract order. bool canReuseExtract(ArrayRef VL, Value *OpValue, - OrdersType &CurrentOrder, + SmallVectorImpl &CurrentOrder, bool ResizeAllowed = false) const; /// Vectorize a single entry in the tree. @@ -3082,10 +3084,10 @@ class BoUpSLP { CombinedOpcode CombinedOp = NotCombinedOp; /// Does this sequence require some shuffling? - SmallVector ReuseShuffleIndices; + SmallVector ReuseShuffleIndices; /// Does this entry require reordering? - OrdersType ReorderIndices; + SmallVector ReorderIndices; /// Points back to the VectorizableTree. /// @@ -4298,12 +4300,12 @@ static void reorderReuses(SmallVectorImpl &Reuses, ArrayRef Mask) { /// the original order of the scalars. Procedure transforms the provided order /// in accordance with the given \p Mask. If the resulting \p Order is just an /// identity order, \p Order is cleared. -static void reorderOrder(BoUpSLP::OrdersType &Order, ArrayRef Mask, +static void reorderOrder(SmallVectorImpl &Order, ArrayRef Mask, bool BottomOrder = false) { assert(!Mask.empty() && "Expected non-empty mask."); unsigned Sz = Mask.size(); if (BottomOrder) { - BoUpSLP::OrdersType PrevOrder; + SmallVector PrevOrder; if (Order.empty()) { PrevOrder.resize(Sz); std::iota(PrevOrder.begin(), PrevOrder.end(), 0); @@ -4693,7 +4695,7 @@ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind, } BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( - ArrayRef VL, const Value *VL0, OrdersType &Order, + ArrayRef VL, const Value *VL0, SmallVectorImpl &Order, SmallVectorImpl &PointerOps, bool TryRecursiveCheck) const { // Check that a vectorized load would load the same memory as a scalar // load. For example, we don't want to vectorize loads that are smaller @@ -4821,7 +4823,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( for (unsigned Cnt = 0, End = VL.size(); Cnt + VF <= End; Cnt += VF, ++VectorizedCnt) { ArrayRef Slice = VL.slice(Cnt, VF); - OrdersType Order; + SmallVector Order; SmallVector PointerOps; LoadsState LS = canVectorizeLoads(Slice, Slice.front(), Order, PointerOps, @@ -5395,7 +5397,7 @@ void BoUpSLP::reorderNodeWithReuses(TreeEntry &TE, ArrayRef Mask) const { TE.ReorderIndices.clear(); // Try to improve gathered nodes with clustered reuses, if possible. ArrayRef Slice = ArrayRef(NewMask).slice(0, Sz); - OrdersType NewOrder(Slice); + SmallVector NewOrder(Slice); inversePermutation(NewOrder, NewMask); reorderScalars(TE.Scalars, NewMask); // Fill the reuses mask with the identity submasks. @@ -7715,7 +7717,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const { } bool BoUpSLP::canReuseExtract(ArrayRef VL, Value *OpValue, - OrdersType &CurrentOrder, + SmallVectorImpl &CurrentOrder, bool ResizeAllowed) const { const auto *It = find_if(VL, IsaPred); assert(It != VL.end() && "Expected at least one extract instruction."); From 9e0ee0e104a2f10b04144837d6a138b04a0193f6 Mon Sep 17 00:00:00 2001 From: Hansang Bae Date: Thu, 15 Aug 2024 11:44:50 -0500 Subject: [PATCH 047/441] [OpenMP] Add support for pause with omp_pause_stop_tool (#97100) This patch adds support for pause resource with a new enumerator omp_pause_stop_tool. The expected behavior of this enumerator is * omp_pause_resource: not allowed * omp_pause_resource_all: equivalent to omp_pause_hard --- openmp/runtime/src/include/omp.h.var | 3 +- openmp/runtime/src/include/omp_lib.F90.var | 1 + openmp/runtime/src/include/omp_lib.h.var | 2 + openmp/runtime/src/kmp.h | 3 +- openmp/runtime/src/kmp_ftn_entry.h | 2 + openmp/runtime/src/kmp_runtime.cpp | 3 +- .../runtime/test/ompt/misc/pause_stop_tool.c | 44 +++++++++++++++++++ 7 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 openmp/runtime/test/ompt/misc/pause_stop_tool.c diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var index eb3ab7778606a35..dee46e51d6b340c 100644 --- a/openmp/runtime/src/include/omp.h.var +++ b/openmp/runtime/src/include/omp.h.var @@ -476,7 +476,8 @@ typedef enum omp_pause_resource_t { omp_pause_resume = 0, omp_pause_soft = 1, - omp_pause_hard = 2 + omp_pause_hard = 2, + omp_pause_stop_tool = 3 } omp_pause_resource_t; extern int __KAI_KMPC_CONVENTION omp_pause_resource(omp_pause_resource_t, int); extern int __KAI_KMPC_CONVENTION omp_pause_resource_all(omp_pause_resource_t); diff --git a/openmp/runtime/src/include/omp_lib.F90.var b/openmp/runtime/src/include/omp_lib.F90.var index 63a3c93b8d92965..5133915c7d8cbd1 100644 --- a/openmp/runtime/src/include/omp_lib.F90.var +++ b/openmp/runtime/src/include/omp_lib.F90.var @@ -192,6 +192,7 @@ integer (kind=omp_pause_resource_kind), parameter, public :: omp_pause_resume = 0 integer (kind=omp_pause_resource_kind), parameter, public :: omp_pause_soft = 1 integer (kind=omp_pause_resource_kind), parameter, public :: omp_pause_hard = 2 + integer (kind=omp_pause_resource_kind), parameter, public :: omp_pause_stop_tool = 3 integer (kind=omp_interop_fr_kind), parameter, public :: omp_ifr_cuda = 1 integer (kind=omp_interop_fr_kind), parameter, public :: omp_ifr_cuda_driver = 2 diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var index a709a2f298f8c8c..db1dc889d129989 100644 --- a/openmp/runtime/src/include/omp_lib.h.var +++ b/openmp/runtime/src/include/omp_lib.h.var @@ -248,6 +248,8 @@ parameter(omp_pause_soft=1) integer(kind=omp_pause_resource_kind)omp_pause_hard parameter(omp_pause_hard=2) + integer(kind=omp_pause_resource_kind)omp_pause_stop_tool + parameter(omp_pause_stop_tool=3) integer(kind=omp_interop_fr_kind)omp_ifr_cuda parameter(omp_ifr_cuda=1) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 4f4fb3292d63eec..04bf6c3b34dace2 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4552,7 +4552,8 @@ extern int __kmpc_get_target_offload(); typedef enum kmp_pause_status_t { kmp_not_paused = 0, // status is not paused, or, requesting resume kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause - kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause + kmp_hard_paused = 2, // status is hard-paused, or, requesting hard pause + kmp_stop_tool_paused = 3 // requesting stop_tool pause } kmp_pause_status_t; // This stores the pause state of the runtime diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index 713561734c481ab..9bd3ac973b3528b 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -1427,6 +1427,8 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_PAUSE_RESOURCE)(kmp_pause_status_t kind, #ifdef KMP_STUB return 1; // just fail #else + if (kind == kmp_stop_tool_paused) + return 1; // stop_tool must not be specified if (device_num == KMP_EXPAND_NAME(FTN_GET_INITIAL_DEVICE)()) return __kmpc_pause_resource(kind); else { diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 06bc4939359e264..2c8d9304c46bc25 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -9132,7 +9132,8 @@ int __kmp_pause_resource(kmp_pause_status_t level) { __kmp_soft_pause(); return 0; } - } else if (level == kmp_hard_paused) { // requesting hard pause + } else if (level == kmp_hard_paused || level == kmp_stop_tool_paused) { + // requesting hard pause or stop_tool pause if (__kmp_pause_status != kmp_not_paused) { // error message about already being paused return 1; diff --git a/openmp/runtime/test/ompt/misc/pause_stop_tool.c b/openmp/runtime/test/ompt/misc/pause_stop_tool.c new file mode 100644 index 000000000000000..c93913793244582 --- /dev/null +++ b/openmp/runtime/test/ompt/misc/pause_stop_tool.c @@ -0,0 +1,44 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include +int main() { + int x = 0; + int ret = 0; +#pragma omp parallel +#pragma omp single + x++; + // Expected to fail; omp_pause_stop_tool must not be specified + ret = omp_pause_resource(omp_pause_stop_tool, omp_get_initial_device()); + printf("omp_pause_resource %s\n", ret ? "failed" : "succeeded"); +#pragma omp parallel +#pragma omp single + x++; + // Expected to succeed + ret = omp_pause_resource_all(omp_pause_stop_tool); + printf("omp_pause_resource_all %s\n", ret ? "failed" : "succeeded"); +#pragma omp parallel +#pragma omp single + x++; + printf("x = %d\n", x); + return 0; + + // Check if + // -- omp_pause_resource/resource_all returns expected code + // -- OMPT interface is shut down as expected + + // CHECK-NOT: {{^}}0: Could not register callback + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: ompt_event_parallel_begin + // CHECK: ompt_event_parallel_end + + // CHECK: omp_pause_resource failed + + // CHECK: ompt_event_parallel_begin + // CHECK: ompt_event_parallel_end + + // CHECK: omp_pause_resource_all succeeded + + // CHECK-NOT: ompt_event +} From 6c270a8b9f1e1b80a6016aafb438db7dd89bcb99 Mon Sep 17 00:00:00 2001 From: Koakuma Date: Thu, 15 Aug 2024 23:46:09 +0700 Subject: [PATCH 048/441] [SPARC][Driver] Add -m(no-)v8plus flags handling (#98713) Implement handling for `-m(no-)v8plus` flags to allow the user to switch between V8 and V8+ mode with 32-bit code. Currently it only toggles the V8+ feature bit, ABI and codegen changes will be done in future patches. --- clang/include/clang/Driver/Options.td | 4 ++++ clang/lib/Driver/ToolChains/Arch/Sparc.cpp | 5 +++++ clang/test/Driver/sparc-target-features.c | 3 +++ 3 files changed, 12 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index acc1f2fde53979a..73e19b65dededb7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6149,6 +6149,10 @@ def mvis3 : Flag<["-"], "mvis3">, Group; def mno_vis3 : Flag<["-"], "mno-vis3">, Group; def mhard_quad_float : Flag<["-"], "mhard-quad-float">, Group; def msoft_quad_float : Flag<["-"], "msoft-quad-float">, Group; +def mv8plus : Flag<["-"], "mv8plus">, Group, + HelpText<"Enable V8+ mode, allowing use of 64-bit V9 instructions in 32-bit code">; +def mno_v8plus : Flag<["-"], "mno-v8plus">, Group, + HelpText<"Disable V8+ mode">; foreach i = 1 ... 7 in def ffixed_g#i : Flag<["-"], "ffixed-g"#i>, Group, HelpText<"Reserve the G"#i#" register (SPARC only)">; diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp index ae1a4ba7882627f..5a1fedbec06adfa 100644 --- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp @@ -179,6 +179,11 @@ void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args, Features.push_back("-hard-quad-float"); } + if (Arg *A = Args.getLastArg(options::OPT_mv8plus, options::OPT_mno_v8plus)) { + if (A->getOption().matches(options::OPT_mv8plus)) + Features.push_back("+v8plus"); + } + if (Args.hasArg(options::OPT_ffixed_g1)) Features.push_back("+reserve-g1"); diff --git a/clang/test/Driver/sparc-target-features.c b/clang/test/Driver/sparc-target-features.c index b36f63e7660e830..a839604ff1bc022 100644 --- a/clang/test/Driver/sparc-target-features.c +++ b/clang/test/Driver/sparc-target-features.c @@ -32,3 +32,6 @@ // RUN: %clang --target=sparc -msoft-quad-float %s -### 2>&1 | FileCheck -check-prefix=SOFT-QUAD-FLOAT %s // HARD-QUAD-FLOAT: "-target-feature" "+hard-quad-float" // SOFT-QUAD-FLOAT: "-target-feature" "-hard-quad-float" + +// RUN: %clang --target=sparc -mv8plus %s -### 2>&1 | FileCheck -check-prefix=V8PLUS %s +// V8PLUS: "-target-feature" "+v8plus" From e61776a0edce86ef01efaa708f43476c58173cae Mon Sep 17 00:00:00 2001 From: Koakuma Date: Thu, 15 Aug 2024 23:49:01 +0700 Subject: [PATCH 049/441] [SPARC][Utilities] Add names for SPARC ELF flags in LLVM binary utilities (#102843) This allows us to use and print readable names in LLVM binary utilities. --- llvm/include/llvm/BinaryFormat/ELF.h | 8 ++ llvm/lib/ObjectYAML/ELFYAML.cpp | 14 ++++ .../llvm-readobj/ELF/Sparc/elf-headers.test | 78 +++++++++++++++++++ llvm/tools/llvm-readobj/ELFDumper.cpp | 16 ++++ 4 files changed, 116 insertions(+) create mode 100644 llvm/test/tools/llvm-readobj/ELF/Sparc/elf-headers.test diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 6827b5d95abfe01..2c9f70872962732 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -699,11 +699,19 @@ enum { // SPARC Specific e_flags enum : unsigned { + // ELF extension mask. + // All values are available for EM_SPARC32PLUS & EM_SPARCV9 objects, except + // EF_SPARC_32PLUS which is a EM_SPARC32PLUS-only flag. + // + // Note that those features are not mutually exclusive (one can set more than + // one flag in this group). EF_SPARC_EXT_MASK = 0xffff00, EF_SPARC_32PLUS = 0x000100, EF_SPARC_SUN_US1 = 0x000200, EF_SPARC_HAL_R1 = 0x000400, EF_SPARC_SUN_US3 = 0x000800, + + // Memory model selection mask for EM_SPARCV9 objects. EF_SPARCV9_MM = 0x3, EF_SPARCV9_TSO = 0x0, EF_SPARCV9_PSO = 0x1, diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 1a192f72ae4d606..89ffc383a4a6ece 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -547,6 +547,20 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(EF_RISCV_RVE); BCase(EF_RISCV_TSO); break; + case ELF::EM_SPARC32PLUS: + BCase(EF_SPARC_32PLUS); + BCase(EF_SPARC_SUN_US1); + BCase(EF_SPARC_SUN_US3); + BCase(EF_SPARC_HAL_R1); + break; + case ELF::EM_SPARCV9: + BCase(EF_SPARC_SUN_US1); + BCase(EF_SPARC_SUN_US3); + BCase(EF_SPARC_HAL_R1); + BCaseMask(EF_SPARCV9_RMO, EF_SPARCV9_MM); + BCaseMask(EF_SPARCV9_PSO, EF_SPARCV9_MM); + BCaseMask(EF_SPARCV9_TSO, EF_SPARCV9_MM); + break; case ELF::EM_XTENSA: BCase(EF_XTENSA_XT_INSN); BCaseMask(EF_XTENSA_MACH_NONE, EF_XTENSA_MACH); diff --git a/llvm/test/tools/llvm-readobj/ELF/Sparc/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/Sparc/elf-headers.test new file mode 100644 index 000000000000000..c05cd76640ed3eb --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/Sparc/elf-headers.test @@ -0,0 +1,78 @@ +## Test various combinations of ELF flag values for SPARC. + +# RUN: yaml2obj %s -o %t -DCLASS_NAME="ELFCLASS32" -DMACHINE_NAME="EM_SPARC" -DFLAG_NAME="" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=LLVM-COMMON -DFLAG_VALUE=0x0 +# RUN: llvm-readelf -h %t | FileCheck %s --match-full-lines --check-prefix=GNU \ +# RUN: -DFLAG_VALUE=0x0 -DGNU_FLAG_NAME="" + +# RUN: yaml2obj %s -o %t -DCLASS_NAME="ELFCLASS32" -DMACHINE_NAME="EM_SPARC32PLUS" -DFLAG_NAME="EF_SPARC_32PLUS" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=LLVM-COMMON,LLVM-FLAG0 \ +# RUN: -DFLAG0_NAME=EF_SPARC_32PLUS -DFLAG_VALUE=0x100 +# RUN: llvm-readelf -h %t | FileCheck %s --match-full-lines --check-prefix=GNU \ +# RUN: -DFLAG_VALUE=0x100 -DGNU_FLAG_NAME=", V8+ ABI" + +# RUN: yaml2obj %s -o %t -DCLASS_NAME="ELFCLASS32" -DMACHINE_NAME="EM_SPARC32PLUS" \ +# RUN: -DFLAG_NAME="EF_SPARC_32PLUS, EF_SPARC_SUN_US1" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=LLVM-COMMON,LLVM-FLAG0,LLVM-FLAG1\ +# RUN: -DFLAG_VALUE=0x300 \ +# RUN: -DFLAG0_NAME=EF_SPARC_32PLUS -DFLAG1_NAME=EF_SPARC_SUN_US1 +# RUN: llvm-readelf -h %t | FileCheck %s --match-full-lines --check-prefix=GNU \ +# RUN: -DFLAG_VALUE=0x300 \ +# RUN: -DGNU_FLAG_NAME=", V8+ ABI, Sun UltraSPARC I extensions" + +# RUN: yaml2obj %s -o %t -DCLASS_NAME="ELFCLASS32" -DMACHINE_NAME="EM_SPARC32PLUS" \ +# RUN: -DFLAG_NAME="EF_SPARC_32PLUS, EF_SPARC_SUN_US1, EF_SPARC_HAL_R1, EF_SPARC_SUN_US3" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=LLVM-COMMON,LLVM-FLAG0,LLVM-FLAG1,LLVM-FLAG2,LLVM-FLAG3 \ +# RUN: -DFLAG_VALUE=0xF00 \ +# RUN: -DFLAG0_NAME=EF_SPARC_32PLUS -DFLAG1_NAME=EF_SPARC_HAL_R1 \ +# RUN: -DFLAG2_NAME=EF_SPARC_SUN_US1 -DFLAG3_NAME=EF_SPARC_SUN_US3 +# RUN: llvm-readelf -h %t | FileCheck %s --match-full-lines --check-prefix=GNU \ +# RUN: -DFLAG_VALUE=0xF00 \ +# RUN: -DGNU_FLAG_NAME=", V8+ ABI, Sun UltraSPARC I extensions, HAL/Fujitsu R1 extensions, Sun UltraSPARC III extensions" + +# RUN: yaml2obj %s -o %t -DCLASS_NAME="ELFCLASS64" -DMACHINE_NAME="EM_SPARCV9" -DFLAG_NAME="" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=LLVM-COMMON -DFLAG_VALUE=0x0 +# RUN: llvm-readelf -h %t | FileCheck %s --match-full-lines --check-prefix=GNU \ +# RUN: -DFLAG_VALUE=0x0 -DGNU_FLAG_NAME="" + +# RUN: yaml2obj %s -o %t -DCLASS_NAME="ELFCLASS64" -DMACHINE_NAME="EM_SPARCV9" -DFLAG_NAME="EF_SPARC_SUN_US1" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=LLVM-COMMON,LLVM-FLAG0 \ +# RUN: -DFLAG_VALUE=0x200 -DFLAG0_NAME=EF_SPARC_SUN_US1 +# RUN: llvm-readelf -h %t | FileCheck %s --match-full-lines --check-prefix=GNU \ +# RUN: -DFLAG_VALUE=0x200 -DGNU_FLAG_NAME=", Sun UltraSPARC I extensions" + +# RUN: yaml2obj %s -o %t -DCLASS_NAME="ELFCLASS64" -DMACHINE_NAME="EM_SPARCV9" \ +# RUN: -DFLAG_NAME="EF_SPARC_SUN_US1, EF_SPARCV9_RMO" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=LLVM-COMMON,LLVM-FLAG0,LLVM-FLAG1 \ +# RUN: -DFLAG_VALUE=0x202 \ +# RUN: -DFLAG0_NAME=EF_SPARCV9_RMO -DFLAG1_NAME=EF_SPARC_SUN_US1 +# RUN: llvm-readelf -h %t | FileCheck %s --match-full-lines --check-prefix=GNU \ +# RUN: -DFLAG_VALUE=0x202 \ +# RUN: -DGNU_FLAG_NAME=", Sun UltraSPARC I extensions, Relaxed Memory Ordering" + +# RUN: yaml2obj %s -o %t -DCLASS_NAME="ELFCLASS64" -DMACHINE_NAME="EM_SPARCV9" \ +# RUN: -DFLAG_NAME="EF_SPARC_SUN_US1, EF_SPARC_HAL_R1, EF_SPARC_SUN_US3, EF_SPARCV9_PSO" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=LLVM-COMMON,LLVM-FLAG0,LLVM-FLAG1,LLVM-FLAG2,LLVM-FLAG3 \ +# RUN: -DFLAG_VALUE=0xE01 \ +# RUN: -DFLAG0_NAME=EF_SPARCV9_PSO -DFLAG1_NAME=EF_SPARC_HAL_R1 \ +# RUN: -DFLAG2_NAME=EF_SPARC_SUN_US1 -DFLAG3_NAME=EF_SPARC_SUN_US3 +# RUN: llvm-readelf -h %t | FileCheck %s --match-full-lines --check-prefix=GNU \ +# RUN: -DFLAG_VALUE=0xE01\ +# RUN: -DGNU_FLAG_NAME=", Sun UltraSPARC I extensions, HAL/Fujitsu R1 extensions, Sun UltraSPARC III extensions, Partial Store Ordering" + +--- !ELF +FileHeader: + Class: [[CLASS_NAME]] + Data: ELFDATA2MSB + Type: ET_REL + Machine: [[MACHINE_NAME]] + Flags: [ [[FLAG_NAME]] ] + +# LLVM-COMMON: Flags [ ([[FLAG_VALUE]]) +# LLVM-FLAG0-NEXT: [[FLAG0_NAME]] +# LLVM-FLAG1-NEXT: [[FLAG1_NAME]] +# LLVM-FLAG2-NEXT: [[FLAG2_NAME]] +# LLVM-FLAG3-NEXT: [[FLAG3_NAME]] +# LLVM-COMMON-NEXT: ] + +# GNU: Flags: [[FLAG_VALUE]][[GNU_FLAG_NAME]] diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 2b5e2f4a9d347e3..b4804246875c606 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1684,6 +1684,16 @@ const EnumEntry ElfHeaderRISCVFlags[] = { ENUM_ENT(EF_RISCV_TSO, "TSO"), }; +const EnumEntry ElfHeaderSPARCFlags[] = { + ENUM_ENT(EF_SPARC_32PLUS, "V8+ ABI"), + ENUM_ENT(EF_SPARC_SUN_US1, "Sun UltraSPARC I extensions"), + ENUM_ENT(EF_SPARC_HAL_R1, "HAL/Fujitsu R1 extensions"), + ENUM_ENT(EF_SPARC_SUN_US3, "Sun UltraSPARC III extensions"), + ENUM_ENT(EF_SPARCV9_TSO, "Total Store Ordering"), + ENUM_ENT(EF_SPARCV9_PSO, "Partial Store Ordering"), + ENUM_ENT(EF_SPARCV9_RMO, "Relaxed Memory Ordering"), +}; + const EnumEntry ElfHeaderAVRFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR1), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR2), @@ -3625,6 +3635,9 @@ template void GNUELFDumper::printFileHeaders() { unsigned(ELF::EF_MIPS_ABI), unsigned(ELF::EF_MIPS_MACH)); else if (e.e_machine == EM_RISCV) ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderRISCVFlags)); + else if (e.e_machine == EM_SPARC32PLUS || e.e_machine == EM_SPARCV9) + ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderSPARCFlags), + unsigned(ELF::EF_SPARCV9_MM)); else if (e.e_machine == EM_AVR) ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderAVRFlags), unsigned(ELF::EF_AVR_ARCH_MASK)); @@ -7118,6 +7131,9 @@ template void LLVMELFDumper::printFileHeaders() { } } else if (E.e_machine == EM_RISCV) W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderRISCVFlags)); + else if (E.e_machine == EM_SPARC32PLUS || E.e_machine == EM_SPARCV9) + W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderSPARCFlags), + unsigned(ELF::EF_SPARCV9_MM)); else if (E.e_machine == EM_AVR) W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderAVRFlags), unsigned(ELF::EF_AVR_ARCH_MASK)); From 54154f9f06e08b7ab3c7294352601ca4c6e5e160 Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Thu, 15 Aug 2024 09:57:01 -0700 Subject: [PATCH 050/441] Fix single thread stepping timeout race condition (#104195) This PR fixes a potential race condition in https://github.com/llvm/llvm-project/pull/90930. This race can happen because the original code set `m_info->m_isAlive = true` **after** the timer thread is created. So if there is a context switch happens and timer thread checks `m_info->m_isAlive` before main thread got a chance to run `m_info->m_isAlive = true`, the timer thread may treat `ThreadPlanSingleThreadTimeout` as not alive and simply exit resulting in async interrupt never being sent to resume all threads (deadlock). The PR fixes the race by initializing all states **before** worker timer thread creates. Co-authored-by: jeffreytan81 --- lldb/source/Target/ThreadPlanSingleThreadTimeout.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Target/ThreadPlanSingleThreadTimeout.cpp b/lldb/source/Target/ThreadPlanSingleThreadTimeout.cpp index 0a939d55f4ce493..806ba95c508b7ca 100644 --- a/lldb/source/Target/ThreadPlanSingleThreadTimeout.cpp +++ b/lldb/source/Target/ThreadPlanSingleThreadTimeout.cpp @@ -29,10 +29,10 @@ ThreadPlanSingleThreadTimeout::ThreadPlanSingleThreadTimeout( : ThreadPlan(ThreadPlan::eKindSingleThreadTimeout, "Single thread timeout", thread, eVoteNo, eVoteNoOpinion), m_info(info), m_state(State::WaitTimeout) { - // TODO: reuse m_timer_thread without recreation. - m_timer_thread = std::thread(TimeoutThreadFunc, this); m_info->m_isAlive = true; m_state = m_info->m_last_state; + // TODO: reuse m_timer_thread without recreation. + m_timer_thread = std::thread(TimeoutThreadFunc, this); } ThreadPlanSingleThreadTimeout::~ThreadPlanSingleThreadTimeout() { From dd3f1313ae27a76cfce68e926fd90ac7408b3a21 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Thu, 15 Aug 2024 09:59:29 -0700 Subject: [PATCH 051/441] [bazel] Enable more lit self tests (#104285) I assume the intent of the initial `*/*.py` was to also collect things in `*.py`, but that's not what bazel does unless you use `**/*.py` which is what we're doing now. A few of these tests fail so I explicitly disabled them until someone has time to debug. --- .../llvm/utils/lit/tests/BUILD.bazel | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel index 13f6f815d399504..d89626a6ee9e67c 100644 --- a/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel @@ -34,7 +34,14 @@ expand_template( ] + glob(["Inputs/**"]), ) for src in glob( - ["*/*.py"], - exclude = ["Inputs/**"], + ["**/*.py"], + exclude = [ + "Inputs/**", + "discovery.py", # TODO: debug and re-enable + "max-time.py", + "selecting.py", + "shtest-recursive-substitution.py", + "use-llvm-tool.py", + ], ) ] From d68d2172f9f1f0659b8b4bdbbeb1ccd290a614b5 Mon Sep 17 00:00:00 2001 From: Volodymyr Vasylkun Date: Thu, 15 Aug 2024 18:08:23 +0100 Subject: [PATCH 052/441] [InstCombine] Fold `ucmp/scmp(x, y) >> N` to `zext/sext(x < y)` when N is one less than the width of the result of `ucmp/scmp` (#104009) Proof: https://alive2.llvm.org/ce/z/4diUqN --------- Co-authored-by: Nikita Popov --- .../InstCombine/InstCombineShifts.cpp | 15 +++ .../InstCombine/lshr-ashr-of-uscmp.ll | 107 ++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/lshr-ashr-of-uscmp.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 38f8a41214b6825..794b384d126eb66 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -511,6 +511,21 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) { if (match(Op1, m_Or(m_Value(), m_SpecificInt(BitWidth - 1)))) return replaceOperand(I, 1, ConstantInt::get(Ty, BitWidth - 1)); + Instruction *CmpIntr; + if ((I.getOpcode() == Instruction::LShr || + I.getOpcode() == Instruction::AShr) && + match(Op0, m_OneUse(m_Instruction(CmpIntr))) && + isa(CmpIntr) && + match(Op1, m_SpecificInt(Ty->getScalarSizeInBits() - 1))) { + Value *Cmp = + Builder.CreateICmp(cast(CmpIntr)->getLTPredicate(), + CmpIntr->getOperand(0), CmpIntr->getOperand(1)); + return CastInst::Create(I.getOpcode() == Instruction::LShr + ? Instruction::ZExt + : Instruction::SExt, + Cmp, Ty); + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/lshr-ashr-of-uscmp.ll b/llvm/test/Transforms/InstCombine/lshr-ashr-of-uscmp.ll new file mode 100644 index 000000000000000..93082de93f97a4c --- /dev/null +++ b/llvm/test/Transforms/InstCombine/lshr-ashr-of-uscmp.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +declare void @use(i8 %val) + +; ucmp/scmp(x, y) >> N folds to either zext(x < y) or sext(x < y) +; if N is one less than the width of result of ucmp/scmp +define i8 @ucmp_to_zext(i32 %x, i32 %y) { +; CHECK-LABEL: define i8 @ucmp_to_zext( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) + %2 = lshr i8 %1, 7 + ret i8 %2 +} + +define i8 @ucmp_to_sext(i32 %x, i32 %y) { +; CHECK-LABEL: define i8 @ucmp_to_sext( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) + %2 = ashr i8 %1, 7 + ret i8 %2 +} + +define i8 @scmp_to_zext(i32 %x, i32 %y) { +; CHECK-LABEL: define i8 @scmp_to_zext( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = call i8 @llvm.scmp(i32 %x, i32 %y) + %2 = lshr i8 %1, 7 + ret i8 %2 +} + +define i8 @scmp_to_sext(i32 %x, i32 %y) { +; CHECK-LABEL: define i8 @scmp_to_sext( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = call i8 @llvm.scmp(i32 %x, i32 %y) + %2 = ashr i8 %1, 7 + ret i8 %2 +} + +define <4 x i8> @scmp_to_sext_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: define <4 x i8> @scmp_to_sext_vec( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i8> +; CHECK-NEXT: ret <4 x i8> [[TMP2]] +; + %1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %y) + %2 = ashr <4 x i8> %1, + ret <4 x i8> %2 +} + +; Negative test: incorrect shift amount +define i8 @ucmp_to_zext_neg1(i32 %x, i32 %y) { +; CHECK-LABEL: define i8 @ucmp_to_zext_neg1( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]]) +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 5 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) + %2 = lshr i8 %1, 5 + ret i8 %2 +} + +; Negative test: shift amount is not a constant +define i8 @ucmp_to_zext_neg2(i32 %x, i32 %y, i8 %s) { +; CHECK-LABEL: define i8 @ucmp_to_zext_neg2( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i8 [[S:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]]) +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[S]] +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) + %2 = lshr i8 %1, %s + ret i8 %2 +} + +; Negative test: the result of ucmp/scmp is used more than once +define i8 @ucmp_to_zext_neg3(i32 %x, i32 %y) { +; CHECK-LABEL: define i8 @ucmp_to_zext_neg3( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]]) +; CHECK-NEXT: call void @use(i8 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 7 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) + call void @use(i8 %1) + %2 = lshr i8 %1, 7 + ret i8 %2 +} From 062e69a647c7ea0bc3441223648f9989490abb7a Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Thu, 15 Aug 2024 14:14:18 -0300 Subject: [PATCH 053/441] [flang][OpenMP] Fix 2 more regressions after #101009 (#101538) PR #101009 exposed a semantic check issue with OPTIONAL dummy arguments. Another issue occurred when using %{re,im,len,kind}, as these also need to be skipped when handling variables with implicitly defined DSAs. These issues were found by Fujitsu testsuite. --- flang/lib/Semantics/check-call.cpp | 3 ++- flang/lib/Semantics/resolve-directives.cpp | 15 ++++++++------- flang/test/Semantics/OpenMP/complex.f90 | 13 +++++++++++++ flang/test/Semantics/OpenMP/present.f90 | 9 +++++++++ 4 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 flang/test/Semantics/OpenMP/complex.f90 create mode 100644 flang/test/Semantics/OpenMP/present.f90 diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 9fad1aa3dd0bf97..4708d51d3af4dd9 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -1645,7 +1645,8 @@ static void CheckPresent(evaluate::ActualArguments &arguments, } else { symbol = arg->GetAssumedTypeDummy(); } - if (!symbol || !symbol->attrs().test(semantics::Attr::OPTIONAL)) { + if (!symbol || + !symbol->GetUltimate().attrs().test(semantics::Attr::OPTIONAL)) { messages.Say(arg ? arg->sourceLocation() : messages.at(), "Argument of PRESENT() must be the name of a whole OPTIONAL dummy argument"_err_en_US); } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index d635a7b8b7874fa..cc9f1cc7ed2691d 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2036,20 +2036,21 @@ void OmpAttributeVisitor::Post(const parser::OpenMPAllocatorsConstruct &x) { void OmpAttributeVisitor::Post(const parser::Name &name) { auto *symbol{name.symbol}; auto IsPrivatizable = [](const Symbol *sym) { + auto *misc{sym->detailsIf()}; return !IsProcedure(*sym) && !IsNamedConstant(*sym) && !sym->owner().IsDerivedType() && sym->owner().kind() != Scope::Kind::ImpliedDos && !sym->detailsIf() && - !sym->detailsIf(); + !sym->detailsIf() && + (!misc || + (misc->kind() != MiscDetails::Kind::ComplexPartRe && + misc->kind() != MiscDetails::Kind::ComplexPartIm && + misc->kind() != MiscDetails::Kind::KindParamInquiry && + misc->kind() != MiscDetails::Kind::LenParamInquiry && + misc->kind() != MiscDetails::Kind::ConstructName)); }; if (symbol && !dirContext_.empty() && GetContext().withinConstruct) { - // Exclude construct-names - if (auto *details{symbol->detailsIf()}) { - if (details->kind() == semantics::MiscDetails::Kind::ConstructName) { - return; - } - } if (IsPrivatizable(symbol) && !IsObjectWithDSA(*symbol)) { // TODO: create a separate function to go through the rules for // predetermined, explicitly determined, and implicitly diff --git a/flang/test/Semantics/OpenMP/complex.f90 b/flang/test/Semantics/OpenMP/complex.f90 new file mode 100644 index 000000000000000..62336c7e6b31abd --- /dev/null +++ b/flang/test/Semantics/OpenMP/complex.f90 @@ -0,0 +1,13 @@ +! RUN: %flang_fc1 -fopenmp -fsyntax-only %s + +! Check that using %re/%im inside 'parallel' doesn't cause syntax errors. +subroutine test_complex_re_im + complex :: cc(4) = (1,2) + integer :: i + + !$omp parallel do private(cc) + do i = 1, 4 + print *, cc(i)%re, cc(i)%im + end do + !$omp end parallel do +end subroutine diff --git a/flang/test/Semantics/OpenMP/present.f90 b/flang/test/Semantics/OpenMP/present.f90 new file mode 100644 index 000000000000000..31bdcc7a8c654ee --- /dev/null +++ b/flang/test/Semantics/OpenMP/present.f90 @@ -0,0 +1,9 @@ +! RUN: %flang_fc1 -fopenmp -fsyntax-only %s + +! Check that using 'present' inside 'parallel' doesn't cause syntax errors. +subroutine test_present(opt) + integer, optional :: opt + !$omp parallel + if (present(opt)) print *, "present" + !$omp end parallel +end subroutine From e398da2b37fcc2696e1f5c661e5372844f4e1550 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Thu, 15 Aug 2024 10:11:29 -0700 Subject: [PATCH 054/441] Revert "[Clang] Overflow Pattern Exclusions (#100272)" This reverts commit 9a666deecb9ff6ca3a6b12e6c2877e19b74b54da. Reason: broke buildbots e.g., fork-ubsan.test started failing at https://lab.llvm.org/buildbot/#/builders/66/builds/2819/steps/9/logs/stdio Clang :: CodeGen/compound-assign-overflow.c Clang :: CodeGen/sanitize-atomic-int-overflow.c started failing with https://lab.llvm.org/buildbot/#/builders/52/builds/1570 --- clang/docs/ReleaseNotes.rst | 30 ------- clang/docs/UndefinedBehaviorSanitizer.rst | 42 ---------- clang/include/clang/AST/Expr.h | 9 -- clang/include/clang/AST/Stmt.h | 5 -- clang/include/clang/Basic/LangOptions.def | 2 - clang/include/clang/Basic/LangOptions.h | 28 ------- clang/include/clang/Driver/Options.td | 5 -- clang/include/clang/Driver/SanitizerArgs.h | 1 - clang/lib/AST/Expr.cpp | 54 ------------ clang/lib/CodeGen/CGExprScalar.cpp | 41 +-------- clang/lib/Driver/SanitizerArgs.cpp | 37 --------- clang/lib/Driver/ToolChains/Clang.cpp | 3 - clang/lib/Frontend/CompilerInvocation.cpp | 13 --- clang/lib/Serialization/ASTReaderStmt.cpp | 1 - clang/lib/Serialization/ASTWriterStmt.cpp | 1 - .../CodeGen/overflow-idiom-exclusion-fp.c | 83 ------------------- 16 files changed, 2 insertions(+), 353 deletions(-) delete mode 100644 clang/test/CodeGen/overflow-idiom-exclusion-fp.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b6b7dd5705637a3..4872dbb7a556ad2 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -395,36 +395,6 @@ Moved checkers Sanitizers ---------- -- Added the ``-fsanitize-overflow-pattern-exclusion=`` flag which can be used - to disable specific overflow-dependent code patterns. The supported patterns - are: ``add-overflow-test``, ``negated-unsigned-const``, and - ``post-decr-while``. The sanitizer instrumentation can be toggled off for all - available patterns by specifying ``all``. Conversely, you can disable all - exclusions with ``none``. - - .. code-block:: c++ - - /// specified with ``-fsanitize-overflow-pattern-exclusion=add-overflow-test`` - int common_overflow_check_pattern(unsigned base, unsigned offset) { - if (base + offset < base) { /* ... */ } // The pattern of `a + b < a`, and other re-orderings, won't be instrumented - } - - /// specified with ``-fsanitize-overflow-pattern-exclusion=negated-unsigned-const`` - void negation_overflow() { - unsigned long foo = -1UL; // No longer causes a negation overflow warning - unsigned long bar = -2UL; // and so on... - } - - /// specified with ``-fsanitize-overflow-pattern-exclusion=post-decr-while`` - void while_post_decrement() { - unsigned char count = 16; - while (count--) { /* ... */} // No longer causes unsigned-integer-overflow sanitizer to trip - } - - Many existing projects have a large amount of these code patterns present. - This new flag should allow those projects to enable integer sanitizers with - less noise. - Python Binding Changes ---------------------- - Fixed an issue that led to crashes when calling ``Type.get_exception_specification_kind``. diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index 9f3d980eefbea77..531d56e313826c7 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -293,48 +293,6 @@ To silence reports from unsigned integer overflow, you can set ``-fsanitize-recover=unsigned-integer-overflow``, is particularly useful for providing fuzzing signal without blowing up logs. -Disabling instrumentation for common overflow patterns ------------------------------------------------------- - -There are certain overflow-dependent or overflow-prone code patterns which -produce a lot of noise for integer overflow/truncation sanitizers. Negated -unsigned constants, post-decrements in a while loop condition and simple -overflow checks are accepted and pervasive code patterns. However, the signal -received from sanitizers instrumenting these code patterns may be too noisy for -some projects. To disable instrumentation for these common patterns one should -use ``-fsanitize-overflow-pattern-exclusion=``. - -Currently, this option supports three overflow-dependent code idioms: - -``negated-unsigned-const`` - -.. code-block:: c++ - - /// -fsanitize-overflow-pattern-exclusion=negated-unsigned-const - unsigned long foo = -1UL; // No longer causes a negation overflow warning - unsigned long bar = -2UL; // and so on... - -``post-decr-while`` - -.. code-block:: c++ - - /// -fsanitize-overflow-pattern-exclusion=post-decr-while - unsigned char count = 16; - while (count--) { /* ... */ } // No longer causes unsigned-integer-overflow sanitizer to trip - -``add-overflow-test`` - -.. code-block:: c++ - - /// -fsanitize-overflow-pattern-exclusion=add-overflow-test - if (base + offset < base) { /* ... */ } // The pattern of `a + b < a`, and other re-orderings, - // won't be instrumented (same for signed types) - -You can enable all exclusions with -``-fsanitize-overflow-pattern-exclusion=all`` or disable all exclusions with -``-fsanitize-overflow-pattern-exclusion=none``. Specifying ``none`` has -precedence over other values. - Issue Suppression ================= diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index f5863524723a2ec..5b813bfc2faf908 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -4043,15 +4043,6 @@ class BinaryOperator : public Expr { void setHasStoredFPFeatures(bool B) { BinaryOperatorBits.HasFPFeatures = B; } bool hasStoredFPFeatures() const { return BinaryOperatorBits.HasFPFeatures; } - /// Set and get the bit that informs arithmetic overflow sanitizers whether - /// or not they should exclude certain BinaryOperators from instrumentation - void setExcludedOverflowPattern(bool B) { - BinaryOperatorBits.ExcludedOverflowPattern = B; - } - bool hasExcludedOverflowPattern() const { - return BinaryOperatorBits.ExcludedOverflowPattern; - } - /// Get FPFeatures from trailing storage FPOptionsOverride getStoredFPFeatures() const { assert(hasStoredFPFeatures()); diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index f1a2aac0a8b2f8f..bbd7634bcc3bfb3 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -650,11 +650,6 @@ class alignas(void *) Stmt { LLVM_PREFERRED_TYPE(bool) unsigned HasFPFeatures : 1; - /// Whether or not this BinaryOperator should be excluded from integer - /// overflow sanitization. - LLVM_PREFERRED_TYPE(bool) - unsigned ExcludedOverflowPattern : 1; - SourceLocation OpLoc; }; diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 2e9f2c552aad8a2..d454a7ff2f8cf49 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -406,8 +406,6 @@ VALUE_LANGOPT(TrivialAutoVarInitMaxSize, 32, 0, "stop trivial automatic variable initialization if var size exceeds the specified size (in bytes). Must be greater than 0.") ENUM_LANGOPT(SignedOverflowBehavior, SignedOverflowBehaviorTy, 2, SOB_Undefined, "signed integer overflow handling") -LANGOPT(IgnoreNegationOverflow, 1, 0, "ignore overflow caused by negation") -LANGOPT(SanitizeOverflowIdioms, 1, 1, "enable instrumentation for common overflow idioms") ENUM_LANGOPT(ThreadModel , ThreadModelKind, 2, ThreadModelKind::POSIX, "Thread Model") BENIGN_LANGOPT(ArrowDepth, 32, 256, diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index eb4cb4b5a7e93fa..91f1c2f2e6239ee 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -367,21 +367,6 @@ class LangOptionsBase { PerThread, }; - /// Exclude certain code patterns from being instrumented by arithmetic - /// overflow sanitizers - enum OverflowPatternExclusionKind { - /// Don't exclude any overflow patterns from sanitizers - None = 1 << 0, - /// Exclude all overflow patterns (below) - All = 1 << 1, - /// if (a + b < a) - AddOverflowTest = 1 << 2, - /// -1UL - NegUnsignedConst = 1 << 3, - /// while (count--) - PostDecrInWhile = 1 << 4, - }; - enum class DefaultVisiblityExportMapping { None, /// map only explicit default visibilities to exported @@ -570,11 +555,6 @@ class LangOptions : public LangOptionsBase { /// The default stream kind used for HIP kernel launching. GPUDefaultStreamKind GPUDefaultStream; - /// Which overflow patterns should be excluded from sanitizer instrumentation - unsigned OverflowPatternExclusionMask = 0; - - std::vector OverflowPatternExclusionValues; - /// The seed used by the randomize structure layout feature. std::string RandstructSeed; @@ -650,14 +630,6 @@ class LangOptions : public LangOptionsBase { return MSCompatibilityVersion >= MajorVersion * 100000U; } - bool isOverflowPatternExcluded(OverflowPatternExclusionKind Kind) const { - if (OverflowPatternExclusionMask & OverflowPatternExclusionKind::None) - return false; - if (OverflowPatternExclusionMask & OverflowPatternExclusionKind::All) - return true; - return OverflowPatternExclusionMask & Kind; - } - /// Reset all of the options that are not considered when building a /// module. void resetNonModularOptions(); diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 73e19b65dededb7..40df91dc3fe0e3d 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2565,11 +2565,6 @@ defm sanitize_stats : BoolOption<"f", "sanitize-stats", "Disable">, BothFlags<[], [ClangOption], " sanitizer statistics gathering.">>, Group; -def fsanitize_overflow_pattern_exclusion_EQ : CommaJoined<["-"], "fsanitize-overflow-pattern-exclusion=">, - HelpText<"Specify the overflow patterns to exclude from artihmetic sanitizer instrumentation">, - Visibility<[ClangOption, CC1Option]>, - Values<"none,all,add-overflow-test,negated-unsigned-const,post-decr-while">, - MarshallingInfoStringVector>; def fsanitize_thread_memory_access : Flag<["-"], "fsanitize-thread-memory-access">, Group, HelpText<"Enable memory access instrumentation in ThreadSanitizer (default)">; diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h index e64ec463ca89070..47ef175302679f8 100644 --- a/clang/include/clang/Driver/SanitizerArgs.h +++ b/clang/include/clang/Driver/SanitizerArgs.h @@ -33,7 +33,6 @@ class SanitizerArgs { std::vector BinaryMetadataIgnorelistFiles; int CoverageFeatures = 0; int BinaryMetadataFeatures = 0; - int OverflowPatternExclusions = 0; int MsanTrackOrigins = 0; bool MsanUseAfterDtor = true; bool MsanParamRetval = true; diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 57475c66a94e356..9d5b8167d0ee628 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -4759,53 +4759,6 @@ ParenListExpr *ParenListExpr::CreateEmpty(const ASTContext &Ctx, return new (Mem) ParenListExpr(EmptyShell(), NumExprs); } -/// Certain overflow-dependent code patterns can have their integer overflow -/// sanitization disabled. Check for the common pattern `if (a + b < a)` and -/// return the resulting BinaryOperator responsible for the addition so we can -/// elide overflow checks during codegen. -static std::optional -getOverflowPatternBinOp(const BinaryOperator *E) { - Expr *Addition, *ComparedTo; - if (E->getOpcode() == BO_LT) { - Addition = E->getLHS(); - ComparedTo = E->getRHS(); - } else if (E->getOpcode() == BO_GT) { - Addition = E->getRHS(); - ComparedTo = E->getLHS(); - } else { - return {}; - } - - const Expr *AddLHS = nullptr, *AddRHS = nullptr; - BinaryOperator *BO = dyn_cast(Addition); - - if (BO && BO->getOpcode() == clang::BO_Add) { - // now store addends for lookup on other side of '>' - AddLHS = BO->getLHS(); - AddRHS = BO->getRHS(); - } - - if (!AddLHS || !AddRHS) - return {}; - - const Decl *LHSDecl, *RHSDecl, *OtherDecl; - - LHSDecl = AddLHS->IgnoreParenImpCasts()->getReferencedDeclOfCallee(); - RHSDecl = AddRHS->IgnoreParenImpCasts()->getReferencedDeclOfCallee(); - OtherDecl = ComparedTo->IgnoreParenImpCasts()->getReferencedDeclOfCallee(); - - if (!OtherDecl) - return {}; - - if (!LHSDecl && !RHSDecl) - return {}; - - if ((LHSDecl && LHSDecl == OtherDecl && LHSDecl != RHSDecl) || - (RHSDecl && RHSDecl == OtherDecl && RHSDecl != LHSDecl)) - return BO; - return {}; -} - BinaryOperator::BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs, Opcode opc, QualType ResTy, ExprValueKind VK, ExprObjectKind OK, SourceLocation opLoc, @@ -4815,15 +4768,8 @@ BinaryOperator::BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs, assert(!isCompoundAssignmentOp() && "Use CompoundAssignOperator for compound assignments"); BinaryOperatorBits.OpLoc = opLoc; - BinaryOperatorBits.ExcludedOverflowPattern = 0; SubExprs[LHS] = lhs; SubExprs[RHS] = rhs; - if (Ctx.getLangOpts().isOverflowPatternExcluded( - LangOptions::OverflowPatternExclusionKind::AddOverflowTest)) { - std::optional Result = getOverflowPatternBinOp(this); - if (Result.has_value()) - Result.value()->BinaryOperatorBits.ExcludedOverflowPattern = 1; - } BinaryOperatorBits.HasFPFeatures = FPFeatures.requiresTrailingStorage(); if (hasStoredFPFeatures()) setStoredFPFeatures(FPFeatures); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 6eac2b4c54e1ba3..84392745ea6144d 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -24,7 +24,6 @@ #include "clang/AST/Attr.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" -#include "clang/AST/ParentMapContext.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/CodeGenOptions.h" @@ -196,24 +195,13 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) { if (!Op.mayHaveIntegerOverflow()) return true; - const UnaryOperator *UO = dyn_cast(Op.E); - - if (UO && UO->getOpcode() == UO_Minus && - Ctx.getLangOpts().isOverflowPatternExcluded( - LangOptions::OverflowPatternExclusionKind::NegUnsignedConst) && - UO->isIntegerConstantExpr(Ctx)) - return true; - // If a unary op has a widened operand, the op cannot overflow. - if (UO) + if (const auto *UO = dyn_cast(Op.E)) return !UO->canOverflow(); // We usually don't need overflow checks for binops with widened operands. // Multiplication with promoted unsigned operands is a special case. const auto *BO = cast(Op.E); - if (BO->hasExcludedOverflowPattern()) - return true; - auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS()); if (!OptionalLHSTy) return false; @@ -2778,26 +2766,6 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior( llvm_unreachable("Unknown SignedOverflowBehaviorTy"); } -/// For the purposes of overflow pattern exclusion, does this match the -/// "while(i--)" pattern? -static bool matchesPostDecrInWhile(const UnaryOperator *UO, bool isInc, - bool isPre, ASTContext &Ctx) { - if (isInc || isPre) - return false; - - // -fsanitize-overflow-pattern-exclusion=post-decr-while - if (!Ctx.getLangOpts().isOverflowPatternExcluded( - LangOptions::OverflowPatternExclusionKind::PostDecrInWhile)) - return false; - - // all Parents (usually just one) must be a WhileStmt - for (const auto &Parent : Ctx.getParentMapContext().getParents(*UO)) - if (!Parent.get()) - return false; - - return true; -} - namespace { /// Handles check and update for lastprivate conditional variables. class OMPLastprivateConditionalUpdateRAII { @@ -2909,10 +2877,6 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } else if (type->isIntegerType()) { QualType promotedType; bool canPerformLossyDemotionCheck = false; - - bool excludeOverflowPattern = - matchesPostDecrInWhile(E, isInc, isPre, CGF.getContext()); - if (CGF.getContext().isPromotableIntegerType(type)) { promotedType = CGF.getContext().getPromotedIntegerType(type); assert(promotedType != type && "Shouldn't promote to the same type."); @@ -2972,8 +2936,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } else if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { value = EmitIncDecConsiderOverflowBehavior(E, value, isInc); } else if (E->canOverflow() && type->isUnsignedIntegerType() && - CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && - !excludeOverflowPattern) { + CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) { value = EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec( E, value, isInc, E->getFPFeaturesInEffect(CGF.getLangOpts()))); } else { diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index a63ee944fd1bb41..1fd870b72286e55 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -119,10 +119,6 @@ static SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A, static int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A, bool DiagnoseErrors); -static int parseOverflowPatternExclusionValues(const Driver &D, - const llvm::opt::Arg *A, - bool DiagnoseErrors); - /// Parse -f(no-)?sanitize-metadata= flag values, diagnosing any invalid /// components. Returns OR of members of \c BinaryMetadataFeature enumeration. static int parseBinaryMetadataFeatures(const Driver &D, const llvm::opt::Arg *A, @@ -792,13 +788,6 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, << "fsanitize-trap=cfi"; } - for (const auto *Arg : - Args.filtered(options::OPT_fsanitize_overflow_pattern_exclusion_EQ)) { - Arg->claim(); - OverflowPatternExclusions |= - parseOverflowPatternExclusionValues(D, Arg, DiagnoseErrors); - } - // Parse -f(no-)?sanitize-coverage flags if coverage is supported by the // enabled sanitizers. for (const auto *Arg : Args) { @@ -1252,10 +1241,6 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, addSpecialCaseListOpt(Args, CmdArgs, "-fsanitize-system-ignorelist=", SystemIgnorelistFiles); - if (OverflowPatternExclusions) - Args.AddAllArgs(CmdArgs, - options::OPT_fsanitize_overflow_pattern_exclusion_EQ); - if (MsanTrackOrigins) CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-track-origins=" + Twine(MsanTrackOrigins))); @@ -1441,28 +1426,6 @@ SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A, return Kinds; } -static int parseOverflowPatternExclusionValues(const Driver &D, - const llvm::opt::Arg *A, - bool DiagnoseErrors) { - int Exclusions = 0; - for (int i = 0, n = A->getNumValues(); i != n; ++i) { - const char *Value = A->getValue(i); - int E = - llvm::StringSwitch(Value) - .Case("none", LangOptionsBase::None) - .Case("all", LangOptionsBase::All) - .Case("add-overflow-test", LangOptionsBase::AddOverflowTest) - .Case("negated-unsigned-const", LangOptionsBase::NegUnsignedConst) - .Case("post-decr-while", LangOptionsBase::PostDecrInWhile) - .Default(0); - if (E == 0) - D.Diag(clang::diag::err_drv_unsupported_option_argument) - << A->getSpelling() << Value; - Exclusions |= E; - } - return Exclusions; -} - int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A, bool DiagnoseErrors) { assert(A->getOption().matches(options::OPT_fsanitize_coverage) || diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f2bc11839edd4d0..96aa930ea286129 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7769,9 +7769,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fgpu_default_stream_EQ); } - Args.AddAllArgs(CmdArgs, - options::OPT_fsanitize_overflow_pattern_exclusion_EQ); - Args.AddLastArg(CmdArgs, options::OPT_foffload_uniform_block, options::OPT_fno_offload_uniform_block); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 5a5f5cb79a12f2b..e3911c281985b72 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4267,19 +4267,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } - if (auto *A = Args.getLastArg(OPT_fsanitize_overflow_pattern_exclusion_EQ)) { - for (int i = 0, n = A->getNumValues(); i != n; ++i) { - Opts.OverflowPatternExclusionMask |= - llvm::StringSwitch(A->getValue(i)) - .Case("none", LangOptionsBase::None) - .Case("all", LangOptionsBase::All) - .Case("add-overflow-test", LangOptionsBase::AddOverflowTest) - .Case("negated-unsigned-const", LangOptionsBase::NegUnsignedConst) - .Case("post-decr-while", LangOptionsBase::PostDecrInWhile) - .Default(0); - } - } - // Parse -fsanitize= arguments. parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ), Diags, Opts.Sanitize); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 8ae07907a04abaa..a33f2a41a654974 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1128,7 +1128,6 @@ void ASTStmtReader::VisitBinaryOperator(BinaryOperator *E) { (BinaryOperator::Opcode)CurrentUnpackingBits->getNextBits(/*Width=*/6)); bool hasFP_Features = CurrentUnpackingBits->getNextBit(); E->setHasStoredFPFeatures(hasFP_Features); - E->setExcludedOverflowPattern(CurrentUnpackingBits->getNextBit()); E->setLHS(Record.readSubExpr()); E->setRHS(Record.readSubExpr()); E->setOperatorLoc(readSourceLocation()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index c292d0a789c7cd9..038616a675b7271 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -1063,7 +1063,6 @@ void ASTStmtWriter::VisitBinaryOperator(BinaryOperator *E) { CurrentPackingBits.addBits(E->getOpcode(), /*Width=*/6); bool HasFPFeatures = E->hasStoredFPFeatures(); CurrentPackingBits.addBit(HasFPFeatures); - CurrentPackingBits.addBit(E->hasExcludedOverflowPattern()); Record.AddStmt(E->getLHS()); Record.AddStmt(E->getRHS()); Record.AddSourceLocation(E->getOperatorLoc()); diff --git a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c deleted file mode 100644 index 511a88cc7a28368..000000000000000 --- a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c +++ /dev/null @@ -1,83 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -emit-llvm -o - %s | FileCheck %s - -// Check for potential false positives from patterns that _almost_ match classic overflow-dependent or overflow-prone code patterns -extern unsigned a, b, c; -extern int u, v, w; - -extern unsigned some(void); - -// Make sure all these still have handler paths, we shouldn't be excluding -// instrumentation of any "near" patterns. -// CHECK-LABEL: close_but_not_quite -void close_but_not_quite(void) { - // CHECK: br i1{{.*}}handler. - if (a + b > a) - c = 9; - - // CHECK: br i1{{.*}}handler. - if (a - b < a) - c = 9; - - // CHECK: br i1{{.*}}handler. - if (a + b < a) - c = 9; - - // CHECK: br i1{{.*}}handler. - if (a + b + 1 < a) - c = 9; - - // CHECK: br i1{{.*}}handler. - // CHECK: br i1{{.*}}handler. - if (a + b < a + 1) - c = 9; - - // CHECK: br i1{{.*}}handler. - if (b >= a + b) - c = 9; - - // CHECK: br i1{{.*}}handler. - if (a + a < a) - c = 9; - - // CHECK: br i1{{.*}}handler. - if (a + b == a) - c = 9; - - // CHECK: br i1{{.*}}handler - // Although this can never actually overflow we are still checking that the - // sanitizer instruments it. - while (--a) - some(); -} - -// cvise'd kernel code that caused problems during development -typedef unsigned _size_t; -typedef enum { FSE_repeat_none } FSE_repeat; -typedef enum { ZSTD_defaultAllowed } ZSTD_defaultPolicy_e; -FSE_repeat ZSTD_selectEncodingType_repeatMode; -ZSTD_defaultPolicy_e ZSTD_selectEncodingType_isDefaultAllowed; -_size_t ZSTD_NCountCost(void); - -// CHECK-LABEL: ZSTD_selectEncodingType -// CHECK: br i1{{.*}}handler -void ZSTD_selectEncodingType(void) { - _size_t basicCost = - ZSTD_selectEncodingType_isDefaultAllowed ? ZSTD_NCountCost() : 0, - compressedCost = 3 + ZSTD_NCountCost(); - if (basicCost <= compressedCost) - ZSTD_selectEncodingType_repeatMode = FSE_repeat_none; -} - -// CHECK-LABEL: function_calls -void function_calls(void) { - // CHECK: br i1{{.*}}handler - if (some() + b < some()) - c = 9; -} - -// CHECK-LABEL: not_quite_a_negated_unsigned_const -void not_quite_a_negated_unsigned_const(void) { - // CHECK: br i1{{.*}}handler - a = -b; -} From 7332713b8eea9bb84d8481376f62b8de7c0ddb3a Mon Sep 17 00:00:00 2001 From: "Oleksandr T." Date: Thu, 15 Aug 2024 20:47:14 +0300 Subject: [PATCH 055/441] [Clang] prevent null explicit object argument from being deduced (#104328) Fixes #102025 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Sema/SemaTemplateDeduction.cpp | 3 +++ clang/test/SemaCXX/cxx2b-deducing-this.cpp | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4872dbb7a556ad2..5ba9fcb040e3a61 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -257,6 +257,7 @@ Bug Fixes to C++ Support - Properly reject defaulted relational operators with invalid types for explicit object parameters, e.g., ``bool operator==(this int, const Foo&)`` (#GH100329), and rvalue reference parameters. - Properly reject defaulted copy/move assignment operators that have a non-reference explicit object parameter. +- Fixed an assertion failure by preventing null explicit object arguments from being deduced. (#GH102025). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index ec951d5ac06dbc6..fc883e423f47105 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -4462,6 +4462,9 @@ TemplateDeductionResult Sema::DeduceTemplateArguments( ParamTypesForArgChecking.push_back(ParamType); if (ParamIdx == 0 && HasExplicitObject) { + if (ObjectType.isNull()) + return TemplateDeductionResult::InvalidExplicitArguments; + if (auto Result = DeduceCallArgument(ParamType, 0, /*ExplicitObjectArgument=*/true); Result != TemplateDeductionResult::Success) diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp index 23fb383fb73cbbe..bfcc5991cee5aca 100644 --- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp +++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp @@ -1011,3 +1011,22 @@ struct X; static_assert(__is_same(X, X)); static_assert(!__is_same(X, X)); } // namespace defaulted_compare + +namespace GH102025 { +struct Foo { + template + constexpr auto operator[](this T &&self, auto... i) // expected-note {{candidate template ignored: substitution failure [with T = Foo &, i:auto = <>]: member '_evaluate' used before its declaration}} + -> decltype(_evaluate(self, i...)) { + return self._evaluate(i...); + } + +private: + template + constexpr auto _evaluate(this T &&self, auto... i) -> decltype((i + ...)); +}; + +int main() { + Foo foo; + return foo[]; // expected-error {{no viable overloaded operator[] for type 'Foo'}} +} +} From 29b0a251c14b248848f2bbad1618b66a0c173336 Mon Sep 17 00:00:00 2001 From: zhijian lin Date: Thu, 15 Aug 2024 13:50:54 -0400 Subject: [PATCH 056/441] [llvm-objdump] Print out xcoff file header for xcoff object file with option private-headers (#96350) Print out the XCOFF file header and load section header for the XCOFF object file using llvm-objdump with the --private-headers option. --- .../llvm-objdump/XCOFF/private-headers.test | 52 ++++++++++++ llvm/tools/llvm-objdump/XCOFFDump.cpp | 82 ++++++++++++++++++- 2 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/XCOFF/private-headers.test diff --git a/llvm/test/tools/llvm-objdump/XCOFF/private-headers.test b/llvm/test/tools/llvm-objdump/XCOFF/private-headers.test new file mode 100644 index 000000000000000..9f1e60705bedf3c --- /dev/null +++ b/llvm/test/tools/llvm-objdump/XCOFF/private-headers.test @@ -0,0 +1,52 @@ +## Test the --private-headers option for XCOFF object files. + +# RUN: yaml2obj -DMAGIC=0x1DF --docnum=1 %s -o %t_xcoff32.o +# RUN: yaml2obj -DMAGIC=0x1F7 --docnum=1 %s -o %t_xcoff64.o +# RUN: llvm-objdump --private-headers %t_xcoff32.o | \ +# RUN: FileCheck %s --check-prefixes=CHECK32 --match-full-lines --strict-whitespace +# RUN: llvm-objdump --private-headers %t_xcoff64.o | \ +# RUN: FileCheck %s --check-prefixes=CHECK64 --match-full-lines --strict-whitespace + +--- !XCOFF +FileHeader: + MagicNumber: [[MAGIC]] + CreationTime: 1234 +Sections: + - Name: .text + Flags: [ STYP_TEXT ] + SectionData: "9061FFF880820000" + - Name: .data + Flags: [ STYP_DATA ] + SectionData: "0000000000000FC0" + +# CHECK32:---File Header: +# CHECK32-NEXT:Magic: 0x1df +# CHECK32-NEXT:NumberOfSections: 2 +# CHECK32-NEXT:Timestamp: 1970-01-01 00:20:34 (1234) +# CHECK32-NEXT:SymbolTableOffset: 0x0 +# CHECK32-NEXT:SymbolTableEntries: 0 +# CHECK32-NEXT:OptionalHeaderSize: 0x0 +# CHECK32-NEXT:Flags: 0x0 + +# CHECK64:---File Header: +# CHECK64-NEXT:Magic: 0x1f7 +# CHECK64-NEXT:NumberOfSections: 2 +# CHECK64-NEXT:Timestamp: 1970-01-01 00:20:34 (1234) +# CHECK64-NEXT:SymbolTableOffset: 0x0 +# CHECK64-NEXT:SymbolTableEntries: 0 +# CHECK64-NEXT:OptionalHeaderSize: 0x0 +# CHECK64-NEXT:Flags: 0x0 + +## Test if the creation time of XCOFF is zero and the number of symbols is negative. +# RUN: yaml2obj -DMAGIC=0x1DF --docnum=2 %s -o %t_xcoff_timestamp.o +# RUN: llvm-objdump --private-headers %t_xcoff_timestamp.o | \ +# RUN: FileCheck %s --match-full-lines + +--- !XCOFF +FileHeader: + MagicNumber: 0x1DF + CreationTime: 0 + EntriesInSymbolTable: -1 + +# CHECK: Timestamp: None (0) +# CHECK: SymbolTableEntries: Reserved Value (-1) diff --git a/llvm/tools/llvm-objdump/XCOFFDump.cpp b/llvm/tools/llvm-objdump/XCOFFDump.cpp index d9c00c096209831..5617313cdbf721b 100644 --- a/llvm/tools/llvm-objdump/XCOFFDump.cpp +++ b/llvm/tools/llvm-objdump/XCOFFDump.cpp @@ -20,7 +20,9 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -30,10 +32,86 @@ using namespace llvm::support; namespace { class XCOFFDumper : public objdump::Dumper { + const XCOFFObjectFile &Obj; + unsigned Width; + public: - XCOFFDumper(const object::XCOFFObjectFile &O) : Dumper(O) {} - void printPrivateHeaders() override {} + XCOFFDumper(const object::XCOFFObjectFile &O) : Dumper(O), Obj(O) {} + +private: + void printPrivateHeaders() override; + void printFileHeader(); + FormattedString formatName(StringRef Name); + void printHex(StringRef Name, uint64_t Value); + void printNumber(StringRef Name, uint64_t Value); + void printStrHex(StringRef Name, StringRef Str, uint64_t Value); + void setWidth(unsigned W) { Width = W; }; }; + +void XCOFFDumper::printPrivateHeaders() { printFileHeader(); } + +FormattedString XCOFFDumper::formatName(StringRef Name) { + return FormattedString(Name, Width, FormattedString::JustifyLeft); +} + +void XCOFFDumper::printHex(StringRef Name, uint64_t Value) { + outs() << formatName(Name) << format_hex(Value, 0) << "\n"; +} + +void XCOFFDumper::printNumber(StringRef Name, uint64_t Value) { + outs() << formatName(Name) << format_decimal(Value, 0) << "\n"; +} + +void XCOFFDumper::printStrHex(StringRef Name, StringRef Str, uint64_t Value) { + outs() << formatName(Name) << Str << " (" << format_decimal(Value, 0) + << ")\n"; +} + +void XCOFFDumper::printFileHeader() { + setWidth(20); + outs() << "\n---File Header:\n"; + printHex("Magic:", Obj.getMagic()); + printNumber("NumberOfSections:", Obj.getNumberOfSections()); + + int32_t Timestamp = Obj.getTimeStamp(); + if (Timestamp > 0) { + // This handling of the timestamp assumes that the host system's time_t is + // compatible with AIX time_t. If a platform is not compatible, the lit + // tests will let us know. + time_t TimeDate = Timestamp; + + char FormattedTime[20] = {}; + + size_t BytesFormatted = std::strftime(FormattedTime, sizeof(FormattedTime), + "%F %T", std::gmtime(&TimeDate)); + assert(BytesFormatted && "The size of the buffer FormattedTime is less " + "than the size of the date/time string."); + printStrHex("Timestamp:", FormattedTime, Timestamp); + } else { + // Negative timestamp values are reserved for future use. + printStrHex("Timestamp:", Timestamp == 0 ? "None" : "Reserved Value", + Timestamp); + } + + // The number of symbol table entries is an unsigned value in 64-bit objects + // and a signed value (with negative values being 'reserved') in 32-bit + // objects. + if (Obj.is64Bit()) { + printHex("SymbolTableOffset:", Obj.getSymbolTableOffset64()); + printNumber("SymbolTableEntries:", Obj.getNumberOfSymbolTableEntries64()); + } else { + printHex("SymbolTableOffset:", Obj.getSymbolTableOffset32()); + int32_t SymTabEntries = Obj.getRawNumberOfSymbolTableEntries32(); + if (SymTabEntries >= 0) + printNumber("SymbolTableEntries:", SymTabEntries); + else + printStrHex("SymbolTableEntries:", "Reserved Value", SymTabEntries); + } + + printHex("OptionalHeaderSize:", Obj.getOptionalHeaderSize()); + printHex("Flags:", Obj.getFlags()); +} + } // namespace std::unique_ptr From 75ea8e803a6560b5e16644bb25d6340926156503 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 15 Aug 2024 19:59:55 +0200 Subject: [PATCH 057/441] [clang][Interp] Call move function for certain primitive types (#104437) --- clang/lib/AST/Interp/Descriptor.cpp | 8 ++++++++ clang/test/AST/Interp/lifetimes.cpp | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp index 1b7d9f03f8ef0d0..47b8885c9ae3796 100644 --- a/clang/lib/AST/Interp/Descriptor.cpp +++ b/clang/lib/AST/Interp/Descriptor.cpp @@ -284,6 +284,14 @@ static BlockDtorFn getDtorPrim(PrimType Type) { } static BlockMoveFn getMovePrim(PrimType Type) { + if (Type == PT_Float) + return moveTy::T>; + if (Type == PT_IntAP) + return moveTy::T>; + if (Type == PT_IntAPS) + return moveTy::T>; + if (Type == PT_MemberPtr) + return moveTy::T>; COMPOSITE_TYPE_SWITCH(Type, return moveTy, return nullptr); } diff --git a/clang/test/AST/Interp/lifetimes.cpp b/clang/test/AST/Interp/lifetimes.cpp index 9fca54fe11120aa..9a99485c4a40bff 100644 --- a/clang/test/AST/Interp/lifetimes.cpp +++ b/clang/test/AST/Interp/lifetimes.cpp @@ -60,3 +60,11 @@ namespace MoveFnWorks { } static_assert(dtor_calls_dtor(), ""); } + +namespace PrimitiveMoveFn { + /// This used to crash. + void test() { + const float y = 100; + const float &x = y; + } +} From 7493ea22f8027dc163ca521a71150d264891853c Mon Sep 17 00:00:00 2001 From: earnol Date: Thu, 15 Aug 2024 14:11:27 -0400 Subject: [PATCH 058/441] [test]Fix test error due to CRT dependency (#104462) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove CRT dependency in headers stdint.h, stdio.h inside test clang/test/CodeGen/bit-int-ubsan.c --------- Co-authored-by: Eänolituri Lómitaurë Co-authored-by: Aaron Ballman Co-authored-by: Paul Kirth --- clang/test/CodeGen/bit-int-ubsan.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/clang/test/CodeGen/bit-int-ubsan.c b/clang/test/CodeGen/bit-int-ubsan.c index 35f96963c181d15..10c0d8a23f8f971 100644 --- a/clang/test/CodeGen/bit-int-ubsan.c +++ b/clang/test/CodeGen/bit-int-ubsan.c @@ -1,11 +1,9 @@ // REQUIRES: x86-registered-target -// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O0 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O0 -emit-llvm -o - %s | FileCheck %s // The runtime test checking the _BitInt ubsan feature is located in compiler-rt/test/ubsan/TestCases/Integer/bit-int.c -#include -#include - +typedef unsigned int uint32_t; uint32_t float_divide_by_zero() { float f = 1.0f / 0.0f; // CHECK: constant { i16, i16, [8 x i8] } { i16 1, i16 32, [8 x i8] c"'float'\00" } @@ -78,19 +76,3 @@ uint32_t negative_shift5(unsigned _BitInt(37) x) return x >> c; // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(68)'\00D\00\00\00\00\00" } } - -int main(int argc, char **argv) { - // clang-format off - uint64_t result = - 1ULL + - implicit_unsigned_integer_truncation() + - (uint32_t)array_bounds() + - float_cast_overflow() + - (uint64_t)implicit_signed_integer_truncation() + - negative_shift1(5) + - negative_shift2(5) + - negative_shift3(5) + - negative_shift5(5); - // clang-format on - printf("%u\n", (uint32_t)(result & 0xFFFFFFFF)); -} From 52337d5f9d108f04b2ed06069b21a255c232dc1f Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 15 Aug 2024 19:19:30 +0100 Subject: [PATCH 059/441] llvm-objdump: ensure a MachO symbol isn't STAB before looking up secion (#86667) The section field has been repurposed for some STAB symbol types, and if we blindly look it up we'll produce an error and terminate. Logic already existed Existing stabs test had a section that was in range. Unfortunately I don't know of an easy way to produce stabs entries in LLVM (I thought they died in the 90s until this came up) so I just binary-edited it to cause a failure on existing llvm-objdump. --- .../llvm-objdump/macho-stabs-in-syms.yaml | 263 ++++++++++++++++++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 20 +- 2 files changed, 273 insertions(+), 10 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/macho-stabs-in-syms.yaml diff --git a/llvm/test/tools/llvm-objdump/macho-stabs-in-syms.yaml b/llvm/test/tools/llvm-objdump/macho-stabs-in-syms.yaml new file mode 100644 index 000000000000000..3850473b219cead --- /dev/null +++ b/llvm/test/tools/llvm-objdump/macho-stabs-in-syms.yaml @@ -0,0 +1,263 @@ +# Check we don't error out on MachO files with stabs symbol entries that +# repurpose the section field so it's out of range for normal symbols. + +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump --syms %t | FileCheck %s + +# CHECK: 0000000000000000 d *UND* +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x2 + ncmds: 16 + sizeofcmds: 744 + flags: 0x200085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 16384 + fileoff: 0 + filesize: 16384 + maxprot: 5 + initprot: 5 + nsects: 2 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100003FA0 + size: 8 + offset: 0x3FA0 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 00008052C0035FD6 + - sectname: __unwind_info + segname: __TEXT + addr: 0x100003FA8 + size: 88 + offset: 0x3FA8 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 010000001C000000000000001C000000000000001C00000002000000A03F00004000000040000000A83F00000000000040000000000000000000000000000000030000000C00010010000100000000000000000200000000 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294983680 + vmsize: 16384 + fileoff: 16384 + filesize: 688 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_CHAINED_FIXUPS + cmdsize: 16 + dataoff: 16384 + datasize: 56 + - cmd: LC_DYLD_EXPORTS_TRIE + cmdsize: 16 + dataoff: 16440 + datasize: 48 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 16496 + nsyms: 11 + stroff: 16672 + strsize: 120 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 9 + iextdefsym: 9 + nextdefsym: 2 + iundefsym: 11 + nundefsym: 0 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LOAD_DYLINKER + cmdsize: 32 + name: 12 + Content: '/usr/lib/dyld' + ZeroPadBytes: 7 + - cmd: LC_UUID + cmdsize: 24 + uuid: 73BD5072-2F45-39FD-8013-8102F002C240 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 917504 + sdk: 918528 + ntools: 1 + Tools: + - tool: 3 + version: 72942592 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 + - cmd: LC_MAIN + cmdsize: 24 + entryoff: 16288 + stacksize: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 88171522 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 16488 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 16496 + datasize: 0 + - cmd: LC_CODE_SIGNATURE + cmdsize: 16 + dataoff: 16800 + datasize: 272 +LinkEditData: + ExportTrie: + TerminalSize: 0 + NodeOffset: 0 + Name: '' + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 0 + NodeOffset: 18 + Name: _ + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 2 + NodeOffset: 9 + Name: _mh_execute_header + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + - TerminalSize: 3 + NodeOffset: 13 + Name: main + Flags: 0x0 + Address: 0x3FA0 + Other: 0x0 + ImportName: '' + NameList: + - n_strx: 1 + n_type: 0x64 + # This line has been changed from a real object file to move the section + # out of valid range for normal symbols. + n_sect: 42 + n_desc: 0 + n_value: 0 + - n_strx: 28 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 68 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 74 + n_type: 0x66 + n_sect: 0 + n_desc: 1 + n_value: 1713522175 + - n_strx: 1 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294983584 + - n_strx: 22 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294983584 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 8 + - n_strx: 1 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 4294983584 + - n_strx: 1 + n_type: 0x64 + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 2 + n_type: 0xF + n_sect: 1 + n_desc: 16 + n_value: 4294967296 + - n_strx: 22 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294983584 + StringTable: + - ' ' + - __mh_execute_header + - _main + - '/Users/tim/llvm/llvm-project/build.rel/' + - tmp.c + - '/Users/tim/llvm/llvm-project/build.rel/tmp.o' + - '' + FunctionStarts: [ 0x3FA0 ] + ChainedFixups: [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x30, 0x0, + 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ] +... diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 768a976cd920275..b69d14b4e7609aa 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -2885,16 +2885,6 @@ void Dumper::printSymbol(const SymbolRef &Symbol, reportUniqueWarning(AddrOrErr.takeError()); return; } - uint64_t Address = *AddrOrErr; - section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName); - if (SecI != O.section_end() && shouldAdjustVA(*SecI)) - Address += AdjustVMA; - if ((Address < StartAddress) || (Address > StopAddress)) - return; - SymbolRef::Type Type = - unwrapOrError(Symbol.getType(), FileName, ArchiveName, ArchitectureName); - uint32_t Flags = - unwrapOrError(Symbol.getFlags(), FileName, ArchiveName, ArchitectureName); // Don't ask a Mach-O STAB symbol for its section unless you know that // STAB symbol's section field refers to a valid section index. Otherwise @@ -2913,6 +2903,16 @@ void Dumper::printSymbol(const SymbolRef &Symbol, : unwrapOrError(Symbol.getSection(), FileName, ArchiveName, ArchitectureName); + uint64_t Address = *AddrOrErr; + if (Section != O.section_end() && shouldAdjustVA(*Section)) + Address += AdjustVMA; + if ((Address < StartAddress) || (Address > StopAddress)) + return; + SymbolRef::Type Type = + unwrapOrError(Symbol.getType(), FileName, ArchiveName, ArchitectureName); + uint32_t Flags = + unwrapOrError(Symbol.getFlags(), FileName, ArchiveName, ArchitectureName); + StringRef Name; if (Type == SymbolRef::ST_Debug && Section != O.section_end()) { if (Expected NameOrErr = Section->getName()) From 47721d46187f89c12a13d07b5857496301cf5d6e Mon Sep 17 00:00:00 2001 From: royitaqi Date: Thu, 15 Aug 2024 11:26:24 -0700 Subject: [PATCH 060/441] [lldb] Realpath symlinks for breakpoints (#102223) Improve the chance of resolving file/line breakpoints by realpath'ing the support files before doing a second match attempt, with some conditions applied. A working [hello-world example](https://github.com/royitaqi/lldb_demos/blob/main/realpath/README.md). See [patch](https://github.com/llvm/llvm-project/pull/102223) for more info about problem/motivation, details of the feature, new settings, telemetries and tests. --- lldb/include/lldb/Symbol/CompileUnit.h | 9 +- lldb/include/lldb/Target/Statistics.h | 5 + lldb/include/lldb/Target/Target.h | 3 + lldb/include/lldb/Utility/FileSpecList.h | 8 +- lldb/include/lldb/Utility/RealpathPrefixes.h | 77 +++++ lldb/include/lldb/lldb-forward.h | 1 + .../Breakpoint/BreakpointResolverFileLine.cpp | 12 +- lldb/source/Symbol/CompileUnit.cpp | 14 +- lldb/source/Target/Statistics.cpp | 12 + lldb/source/Target/Target.cpp | 8 + lldb/source/Target/TargetProperties.td | 3 + lldb/source/Utility/CMakeLists.txt | 1 + lldb/source/Utility/FileSpecList.cpp | 108 ++++-- lldb/source/Utility/RealpathPrefixes.cpp | 70 ++++ .../Makefile | 3 + .../TestBreakpoint.py | 158 +++++++++ .../main.c | 10 + .../real/bar.h | 3 + .../real/foo.h | 3 + .../real/qux.h | 3 + .../symlink1/foo.h | 1 + .../symlink2 | 1 + .../to-be-mapped/README.md | 1 + lldb/unittests/Core/CMakeLists.txt | 1 - lldb/unittests/Core/FileSpecListTest.cpp | 125 ------- lldb/unittests/Utility/CMakeLists.txt | 3 + lldb/unittests/Utility/FileSpecListTest.cpp | 323 ++++++++++++++++++ .../unittests/Utility/MockSymlinkFileSystem.h | 66 ++++ .../Utility/RealpathPrefixesTest.cpp | 140 ++++++++ 29 files changed, 1002 insertions(+), 170 deletions(-) create mode 100644 lldb/include/lldb/Utility/RealpathPrefixes.h create mode 100644 lldb/source/Utility/RealpathPrefixes.cpp create mode 100644 lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/Makefile create mode 100644 lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/TestBreakpoint.py create mode 100644 lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/main.c create mode 100644 lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/bar.h create mode 100644 lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/foo.h create mode 100644 lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/qux.h create mode 120000 lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/symlink1/foo.h create mode 120000 lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/symlink2 create mode 100644 lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/to-be-mapped/README.md delete mode 100644 lldb/unittests/Core/FileSpecListTest.cpp create mode 100644 lldb/unittests/Utility/FileSpecListTest.cpp create mode 100644 lldb/unittests/Utility/MockSymlinkFileSystem.h create mode 100644 lldb/unittests/Utility/RealpathPrefixesTest.cpp diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h index c20a37e32830755..c5bb080d2118490 100644 --- a/lldb/include/lldb/Symbol/CompileUnit.h +++ b/lldb/include/lldb/Symbol/CompileUnit.h @@ -19,11 +19,13 @@ #include "lldb/Utility/Stream.h" #include "lldb/Utility/UserID.h" #include "lldb/lldb-enumerations.h" +#include "lldb/lldb-forward.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" namespace lldb_private { + /// \class CompileUnit CompileUnit.h "lldb/Symbol/CompileUnit.h" /// A class that describes a compilation unit. /// @@ -389,10 +391,15 @@ class CompileUnit : public std::enable_shared_from_this, /// A SymbolContext list class that will get any matching /// entries appended to. /// + /// \param[in] realpath_prefixes + /// Paths that start with one of the prefixes in this list will be + /// realpath'ed to resolve any symlinks. + /// /// \see enum SymbolContext::Scope void ResolveSymbolContext(const SourceLocationSpec &src_location_spec, lldb::SymbolContextItem resolve_scope, - SymbolContextList &sc_list); + SymbolContextList &sc_list, + RealpathPrefixes *realpath_prefixes = nullptr); /// Get whether compiler optimizations were enabled for this compile unit /// diff --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h index 35bd7f8a66e055e..5193d099a5494d8 100644 --- a/lldb/include/lldb/Target/Statistics.h +++ b/lldb/include/lldb/Target/Statistics.h @@ -10,6 +10,7 @@ #define LLDB_TARGET_STATISTICS_H #include "lldb/Utility/ConstString.h" +#include "lldb/Utility/RealpathPrefixes.h" #include "lldb/Utility/Stream.h" #include "lldb/lldb-forward.h" #include "llvm/ADT/StringMap.h" @@ -184,6 +185,8 @@ class TargetStats { void SetFirstPrivateStopTime(); void SetFirstPublicStopTime(); void IncreaseSourceMapDeduceCount(); + void IncreaseSourceRealpathAttemptCount(uint32_t count); + void IncreaseSourceRealpathCompatibleCount(uint32_t count); StatsDuration &GetCreateTime() { return m_create_time; } StatsSuccessFail &GetExpressionStats() { return m_expr_eval; } @@ -198,6 +201,8 @@ class TargetStats { StatsSuccessFail m_frame_var{"frameVariable"}; std::vector m_module_identifiers; uint32_t m_source_map_deduce_count = 0; + uint32_t m_source_realpath_attempt_count = 0; + uint32_t m_source_realpath_compatible_count = 0; void CollectStats(Target &target); }; diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 119dff4d498199f..7f4d607f5427dff 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -34,6 +34,7 @@ #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/Broadcaster.h" #include "lldb/Utility/LLDBAssert.h" +#include "lldb/Utility/RealpathPrefixes.h" #include "lldb/Utility/Timeout.h" #include "lldb/lldb-public.h" @@ -117,6 +118,8 @@ class TargetProperties : public Properties { InlineStrategy GetInlineStrategy() const; + RealpathPrefixes GetSourceRealpathPrefixes() const; + llvm::StringRef GetArg0() const; void SetArg0(llvm::StringRef arg); diff --git a/lldb/include/lldb/Utility/FileSpecList.h b/lldb/include/lldb/Utility/FileSpecList.h index 6eb3bb9971f13ad..d091a9246e08277 100644 --- a/lldb/include/lldb/Utility/FileSpecList.h +++ b/lldb/include/lldb/Utility/FileSpecList.h @@ -64,10 +64,16 @@ class SupportFileList { /// \param[in] file /// The file specification to search for. /// + /// \param[in] realpath_prefixes + /// Paths that start with one of the prefixes in this list will be + /// realpath'ed to resolve any symlinks. + /// /// \return /// The index of the file that matches \a file if it is found, /// else UINT32_MAX is returned. - size_t FindCompatibleIndex(size_t idx, const FileSpec &file) const; + size_t + FindCompatibleIndex(size_t idx, const FileSpec &file, + RealpathPrefixes *realpath_prefixes = nullptr) const; template void EmplaceBack(Args &&...args) { m_files.push_back( diff --git a/lldb/include/lldb/Utility/RealpathPrefixes.h b/lldb/include/lldb/Utility/RealpathPrefixes.h new file mode 100644 index 000000000000000..daa2a479d6a2469 --- /dev/null +++ b/lldb/include/lldb/Utility/RealpathPrefixes.h @@ -0,0 +1,77 @@ +//===-- RealpathPrefixes.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_UTILITY_REALPATHPREFIXES_H +#define LLDB_UTILITY_REALPATHPREFIXES_H + +#include "lldb/lldb-forward.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/Support/VirtualFileSystem.h" + +#include +#include +#include + +namespace lldb_private { + +class RealpathPrefixes { +public: + /// \param[in] file_spec_list + /// Prefixes are obtained from FileSpecList, through FileSpec::GetPath(), + /// which ensures that the paths are normalized. For example: + /// "./foo/.." -> "" + /// "./foo/../bar" -> "bar" + /// + /// \param[in] fs + /// An optional filesystem to use for realpath'ing. If not set, the real + /// filesystem will be used. + explicit RealpathPrefixes(const FileSpecList &file_spec_list, + llvm::IntrusiveRefCntPtr fs = + llvm::vfs::getRealFileSystem()); + + std::optional ResolveSymlinks(const FileSpec &file_spec); + + // If/when Statistics.h/cpp is moved into Utility, we can remove these + // methods, hold a (weak) pointer to `TargetStats` and directly increment + // on that object. + void IncreaseSourceRealpathAttemptCount() { + ++m_source_realpath_attempt_count; + } + uint32_t GetSourceRealpathAttemptCount() const { + return m_source_realpath_attempt_count; + } + void IncreaseSourceRealpathCompatibleCount() { + ++m_source_realpath_compatible_count; + } + uint32_t GetSourceRealpathCompatibleCount() const { + return m_source_realpath_compatible_count; + } + +private: + // Paths that start with one of the prefixes in this list will be realpath'ed + // to resolve any symlinks. + // + // Wildcard prefixes: + // - "" (empty string) will match all paths. + // - "/" will match all absolute paths. + std::vector m_prefixes; + + // The filesystem to use for realpath'ing. + llvm::IntrusiveRefCntPtr m_fs; + + // The optional Target instance to gather statistics. + lldb::TargetWP m_target; + + // Statistics that we temprarily hold here, to be gathered into TargetStats + uint32_t m_source_realpath_attempt_count = 0; + uint32_t m_source_realpath_compatible_count = 0; +}; + +} // namespace lldb_private + +#endif // LLDB_UTILITY_REALPATHPREFIXES_H diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 1024501e05bcac7..337eff696fcf3f5 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -175,6 +175,7 @@ class Queue; class QueueImpl; class QueueItem; class REPL; +class RealpathPrefixes; class RecognizedStackFrame; class RegisterCheckpoint; class RegisterContext; diff --git a/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp b/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp index 16c4ee1b88d1620..508754082cae8a5 100644 --- a/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp @@ -15,6 +15,7 @@ #include "lldb/Target/Target.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" +#include "lldb/Utility/RealpathPrefixes.h" #include "lldb/Utility/StreamString.h" #include @@ -290,16 +291,25 @@ Searcher::CallbackReturn BreakpointResolverFileLine::SearchCallback( const uint32_t line = m_location_spec.GetLine().value_or(0); const std::optional column = m_location_spec.GetColumn(); + Target &target = GetBreakpoint()->GetTarget(); + RealpathPrefixes realpath_prefixes = target.GetSourceRealpathPrefixes(); + const size_t num_comp_units = context.module_sp->GetNumCompileUnits(); for (size_t i = 0; i < num_comp_units; i++) { CompUnitSP cu_sp(context.module_sp->GetCompileUnitAtIndex(i)); if (cu_sp) { if (filter.CompUnitPasses(*cu_sp)) cu_sp->ResolveSymbolContext(m_location_spec, eSymbolContextEverything, - sc_list); + sc_list, &realpath_prefixes); } } + // Gather stats into the Target + target.GetStatistics().IncreaseSourceRealpathAttemptCount( + realpath_prefixes.GetSourceRealpathAttemptCount()); + target.GetStatistics().IncreaseSourceRealpathCompatibleCount( + realpath_prefixes.GetSourceRealpathCompatibleCount()); + FilterContexts(sc_list); DeduceSourceMapping(sc_list); diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index ddeacf18e855ee3..db8f8ce6bcbc923 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -213,11 +213,12 @@ VariableListSP CompileUnit::GetVariableList(bool can_create) { return m_variables; } -std::vector FindFileIndexes(const SupportFileList &files, - const FileSpec &file) { +std::vector +FindFileIndexes(const SupportFileList &files, const FileSpec &file, + RealpathPrefixes *realpath_prefixes = nullptr) { std::vector result; uint32_t idx = -1; - while ((idx = files.FindCompatibleIndex(idx + 1, file)) != + while ((idx = files.FindCompatibleIndex(idx + 1, file, realpath_prefixes)) != UINT32_MAX) result.push_back(idx); return result; @@ -247,7 +248,8 @@ uint32_t CompileUnit::FindLineEntry(uint32_t start_idx, uint32_t line, void CompileUnit::ResolveSymbolContext( const SourceLocationSpec &src_location_spec, - SymbolContextItem resolve_scope, SymbolContextList &sc_list) { + SymbolContextItem resolve_scope, SymbolContextList &sc_list, + RealpathPrefixes *realpath_prefixes) { const FileSpec file_spec = src_location_spec.GetFileSpec(); const uint32_t line = src_location_spec.GetLine().value_or(0); const bool check_inlines = src_location_spec.GetCheckInlines(); @@ -275,8 +277,8 @@ void CompileUnit::ResolveSymbolContext( return; } - std::vector file_indexes = FindFileIndexes(GetSupportFiles(), - file_spec); + std::vector file_indexes = + FindFileIndexes(GetSupportFiles(), file_spec, realpath_prefixes); const size_t num_file_indexes = file_indexes.size(); if (num_file_indexes == 0) return; diff --git a/lldb/source/Target/Statistics.cpp b/lldb/source/Target/Statistics.cpp index 583d1524881fc38..390e04cebf6be6c 100644 --- a/lldb/source/Target/Statistics.cpp +++ b/lldb/source/Target/Statistics.cpp @@ -192,6 +192,10 @@ TargetStats::ToJSON(Target &target, } target_metrics_json.try_emplace("sourceMapDeduceCount", m_source_map_deduce_count); + target_metrics_json.try_emplace("sourceRealpathAttemptCount", + m_source_realpath_attempt_count); + target_metrics_json.try_emplace("sourceRealpathCompatibleCount", + m_source_realpath_compatible_count); return target_metrics_json; } @@ -220,6 +224,14 @@ void TargetStats::IncreaseSourceMapDeduceCount() { ++m_source_map_deduce_count; } +void TargetStats::IncreaseSourceRealpathAttemptCount(uint32_t count) { + m_source_realpath_attempt_count += count; +} + +void TargetStats::IncreaseSourceRealpathCompatibleCount(uint32_t count) { + m_source_realpath_compatible_count += count; +} + bool DebuggerStats::g_collecting_stats = false; llvm::json::Value DebuggerStats::ReportStatistics( diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 129683c43f0c1a0..5a5d689e03fbc04 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -60,6 +60,7 @@ #include "lldb/Utility/LLDBAssert.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" +#include "lldb/Utility/RealpathPrefixes.h" #include "lldb/Utility/State.h" #include "lldb/Utility/StreamString.h" #include "lldb/Utility/Timer.h" @@ -4354,6 +4355,13 @@ InlineStrategy TargetProperties::GetInlineStrategy() const { static_cast(g_target_properties[idx].default_uint_value)); } +// Returning RealpathPrefixes, but the setting's type is FileSpecList. We do +// this because we want the FileSpecList to normalize the file paths for us. +RealpathPrefixes TargetProperties::GetSourceRealpathPrefixes() const { + const uint32_t idx = ePropertySourceRealpathPrefixes; + return RealpathPrefixes(GetPropertyAtIndexAs(idx, {})); +} + llvm::StringRef TargetProperties::GetArg0() const { const uint32_t idx = ePropertyArg0; return GetPropertyAtIndexAs( diff --git a/lldb/source/Target/TargetProperties.td b/lldb/source/Target/TargetProperties.td index ef538678670fea0..421252aa4aea26b 100644 --- a/lldb/source/Target/TargetProperties.td +++ b/lldb/source/Target/TargetProperties.td @@ -150,6 +150,9 @@ let Definition = "target" in { DefaultEnumValue<"eInlineBreakpointsAlways">, EnumValues<"OptionEnumValues(g_inline_breakpoint_enums)">, Desc<"The strategy to use when settings breakpoints by file and line. Breakpoint locations can end up being inlined by the compiler, so that a compile unit 'a.c' might contain an inlined function from another source file. Usually this is limited to breakpoint locations from inlined functions from header or other include files, or more accurately non-implementation source files. Sometimes code might #include implementation files and cause inlined breakpoint locations in inlined implementation files. Always checking for inlined breakpoint locations can be expensive (memory and time), so if you have a project with many headers and find that setting breakpoints is slow, then you can change this setting to headers. This setting allows you to control exactly which strategy is used when setting file and line breakpoints.">; + def SourceRealpathPrefixes: Property<"source-realpath-prefixes", "FileSpecList">, + DefaultStringValue<"">, + Desc<"Realpath any source paths that start with one of these prefixes. If the debug info contains symlinks which match the original source file's basename but don't match its location that the user will use to set breakpoints, then this setting can help resolve breakpoints correctly. This handles both symlinked files and directories. Wild card prefixes: An empty string matches all paths. A forward slash matches absolute paths.">; def DisassemblyFlavor: Property<"x86-disassembly-flavor", "Enum">, DefaultEnumValue<"eX86DisFlavorDefault">, EnumValues<"OptionEnumValues(g_x86_dis_flavor_value_types)">, diff --git a/lldb/source/Utility/CMakeLists.txt b/lldb/source/Utility/CMakeLists.txt index e9954d66cd1a528..397db0e89760234 100644 --- a/lldb/source/Utility/CMakeLists.txt +++ b/lldb/source/Utility/CMakeLists.txt @@ -51,6 +51,7 @@ add_lldb_library(lldbUtility NO_INTERNAL_DEPENDENCIES Log.cpp NameMatches.cpp ProcessInfo.cpp + RealpathPrefixes.cpp RegisterValue.cpp RegularExpression.cpp Instrumentation.cpp diff --git a/lldb/source/Utility/FileSpecList.cpp b/lldb/source/Utility/FileSpecList.cpp index 7647e04a8204516..5852367f77827f6 100644 --- a/lldb/source/Utility/FileSpecList.cpp +++ b/lldb/source/Utility/FileSpecList.cpp @@ -7,7 +7,12 @@ //===----------------------------------------------------------------------===// #include "lldb/Utility/FileSpecList.h" +#include "lldb/Target/Statistics.h" +#include "lldb/Target/Target.h" #include "lldb/Utility/ConstString.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/RealpathPrefixes.h" #include "lldb/Utility/Stream.h" #include @@ -108,52 +113,85 @@ size_t SupportFileList::FindFileIndex(size_t start_idx, }); } -size_t SupportFileList::FindCompatibleIndex(size_t start_idx, - const FileSpec &file_spec) const { - const size_t num_files = m_files.size(); - if (start_idx >= num_files) - return UINT32_MAX; +enum IsCompatibleResult { + kNoMatch = 0, + kOnlyFileMatch = 1, + kBothDirectoryAndFileMatch = 2, +}; +IsCompatibleResult IsCompatible(const FileSpec &curr_file, + const FileSpec &file_spec) { const bool file_spec_relative = file_spec.IsRelative(); const bool file_spec_case_sensitive = file_spec.IsCaseSensitive(); // When looking for files, we will compare only the filename if the directory // argument is empty in file_spec const bool full = !file_spec.GetDirectory().IsEmpty(); + // Always start by matching the filename first + if (!curr_file.FileEquals(file_spec)) + return IsCompatibleResult::kNoMatch; + + // Only compare the full name if the we were asked to and if the current + // file entry has a directory. If it doesn't have a directory then we only + // compare the filename. + if (FileSpec::Equal(curr_file, file_spec, full)) { + return IsCompatibleResult::kBothDirectoryAndFileMatch; + } else if (curr_file.IsRelative() || file_spec_relative) { + llvm::StringRef curr_file_dir = curr_file.GetDirectory().GetStringRef(); + if (curr_file_dir.empty()) + // Basename match only for this file in the list + return IsCompatibleResult::kBothDirectoryAndFileMatch; + + // Check if we have a relative path in our file list, or if "file_spec" is + // relative, if so, check if either ends with the other. + llvm::StringRef file_spec_dir = file_spec.GetDirectory().GetStringRef(); + // We have a relative path in our file list, it matches if the + // specified path ends with this path, but we must ensure the full + // component matches (we don't want "foo/bar.cpp" to match "oo/bar.cpp"). + auto is_suffix = [](llvm::StringRef a, llvm::StringRef b, + bool case_sensitive) -> bool { + if (case_sensitive ? a.consume_back(b) : a.consume_back_insensitive(b)) + return a.empty() || a.ends_with("/"); + return false; + }; + const bool case_sensitive = + file_spec_case_sensitive || curr_file.IsCaseSensitive(); + if (is_suffix(curr_file_dir, file_spec_dir, case_sensitive) || + is_suffix(file_spec_dir, curr_file_dir, case_sensitive)) + return IsCompatibleResult::kBothDirectoryAndFileMatch; + } + return IsCompatibleResult::kOnlyFileMatch; +} + +size_t SupportFileList::FindCompatibleIndex( + size_t start_idx, const FileSpec &file_spec, + RealpathPrefixes *realpath_prefixes) const { + const size_t num_files = m_files.size(); + if (start_idx >= num_files) + return UINT32_MAX; + for (size_t idx = start_idx; idx < num_files; ++idx) { const FileSpec &curr_file = m_files[idx]->GetSpecOnly(); - // Always start by matching the filename first - if (!curr_file.FileEquals(file_spec)) - continue; - - // Only compare the full name if the we were asked to and if the current - // file entry has the a directory. If it doesn't have a directory then we - // only compare the filename. - if (FileSpec::Equal(curr_file, file_spec, full)) { + IsCompatibleResult result = IsCompatible(curr_file, file_spec); + if (result == IsCompatibleResult::kBothDirectoryAndFileMatch) return idx; - } else if (curr_file.IsRelative() || file_spec_relative) { - llvm::StringRef curr_file_dir = curr_file.GetDirectory().GetStringRef(); - if (curr_file_dir.empty()) - return idx; // Basename match only for this file in the list - - // Check if we have a relative path in our file list, or if "file_spec" is - // relative, if so, check if either ends with the other. - llvm::StringRef file_spec_dir = file_spec.GetDirectory().GetStringRef(); - // We have a relative path in our file list, it matches if the - // specified path ends with this path, but we must ensure the full - // component matches (we don't want "foo/bar.cpp" to match "oo/bar.cpp"). - auto is_suffix = [](llvm::StringRef a, llvm::StringRef b, - bool case_sensitive) -> bool { - if (case_sensitive ? a.consume_back(b) : a.consume_back_insensitive(b)) - return a.empty() || a.ends_with("/"); - return false; - }; - const bool case_sensitive = - file_spec_case_sensitive || curr_file.IsCaseSensitive(); - if (is_suffix(curr_file_dir, file_spec_dir, case_sensitive) || - is_suffix(file_spec_dir, curr_file_dir, case_sensitive)) - return idx; + + if (realpath_prefixes && result == IsCompatibleResult::kOnlyFileMatch) { + if (std::optional resolved_curr_file = + realpath_prefixes->ResolveSymlinks(curr_file)) { + if (IsCompatible(*resolved_curr_file, file_spec) == + IsCompatibleResult::kBothDirectoryAndFileMatch) { + // Stats and logging. + realpath_prefixes->IncreaseSourceRealpathCompatibleCount(); + Log *log = GetLog(LLDBLog::Source); + LLDB_LOGF(log, + "Realpath'ed support file %s is compatible to input file", + resolved_curr_file->GetPath().c_str()); + // We found a match + return idx; + } + } } } diff --git a/lldb/source/Utility/RealpathPrefixes.cpp b/lldb/source/Utility/RealpathPrefixes.cpp new file mode 100644 index 000000000000000..14c81ee6a1f5711 --- /dev/null +++ b/lldb/source/Utility/RealpathPrefixes.cpp @@ -0,0 +1,70 @@ +//===-- RealpathPrefixes.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Utility/RealpathPrefixes.h" + +#include "lldb/Utility/FileSpec.h" +#include "lldb/Utility/FileSpecList.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" + +using namespace lldb_private; + +RealpathPrefixes::RealpathPrefixes( + const FileSpecList &file_spec_list, + llvm::IntrusiveRefCntPtr fs) + : m_fs(fs) { + m_prefixes.reserve(file_spec_list.GetSize()); + for (const FileSpec &file_spec : file_spec_list) { + m_prefixes.emplace_back(file_spec.GetPath()); + } +} + +std::optional +RealpathPrefixes::ResolveSymlinks(const FileSpec &file_spec) { + if (m_prefixes.empty()) + return std::nullopt; + + // Test if `b` is a *path* prefix of `a` (not just *string* prefix). + // E.g. "/foo/bar" is a path prefix of "/foo/bar/baz" but not "/foo/barbaz". + auto is_path_prefix = [](llvm::StringRef a, llvm::StringRef b, + bool case_sensitive, + llvm::sys::path::Style style) -> bool { + if (case_sensitive ? a.consume_front(b) : a.consume_front_insensitive(b)) + // If `b` isn't "/", then it won't end with "/" because it comes from + // `FileSpec`. After `a` consumes `b`, `a` should either be empty (i.e. + // `a` == `b`) or end with "/" (the remainder of `a` is a subdirectory). + return b == "/" || a.empty() || + llvm::sys::path::is_separator(a[0], style); + return false; + }; + std::string file_spec_path = file_spec.GetPath(); + for (const std::string &prefix : m_prefixes) { + if (is_path_prefix(file_spec_path, prefix, file_spec.IsCaseSensitive(), + file_spec.GetPathStyle())) { + // Stats and logging. + IncreaseSourceRealpathAttemptCount(); + Log *log = GetLog(LLDBLog::Source); + LLDB_LOGF(log, "Realpath'ing support file %s", file_spec_path.c_str()); + + // One prefix matched. Try to realpath. + llvm::SmallString buff; + std::error_code ec = m_fs->getRealPath(file_spec_path, buff); + if (ec) + return std::nullopt; + FileSpec realpath(buff, file_spec.GetPathStyle()); + + // Only return realpath if it is different from the original file_spec. + if (realpath != file_spec) + return realpath; + return std::nullopt; + } + } + // No prefix matched + return std::nullopt; +} diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/Makefile b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/Makefile new file mode 100644 index 000000000000000..10495940055b63d --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/TestBreakpoint.py b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/TestBreakpoint.py new file mode 100644 index 000000000000000..5dc2af73f3647d4 --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/TestBreakpoint.py @@ -0,0 +1,158 @@ +""" +Test lldb breakpoint with symlinks/realpath and source-map. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil, lldbplatformutil + + +class BreakpointTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def setUp(self): + # Call super's setUp(). + TestBase.setUp(self) + # Find the line number to break inside main(). + self.line_in_main = line_number("main.c", "// Set break point at this line.") + self.line_in_foo = line_number("real/foo.h", "// Set break point at this line.") + self.line_in_bar = line_number("real/bar.h", "// Set break point at this line.") + self.line_in_qux = line_number("real/qux.h", "// Set break point at this line.") + # disable "There is a running process, kill it and restart?" prompt + self.runCmd("settings set auto-confirm true") + self.addTearDownHook(lambda: self.runCmd("settings clear auto-confirm")) + + def buildAndCreateTarget(self): + self.build() + exe = self.getBuildArtifact("a.out") + + # Create a target by the debugger. + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, VALID_TARGET) + + @skipIf(oslist=["windows"]) + def test_file_line_breakpoint_realpath_and_source_map(self): + """Test file/line breakpoint with realpathing and source-mapping.""" + self.buildAndCreateTarget() + cwd = os.getcwd() + + ###################################################################### + # Baseline + # -------------------------------------------------------------------- + # Breakpoints should be resolved with paths which are in the line-table. + lldbutil.run_break_set_by_file_and_line( + self, "main.c", self.line_in_main, num_expected_locations=1, loc_exact=True + ) + lldbutil.run_break_set_by_file_and_line( + self, + "symlink1/foo.h", + self.line_in_foo, + num_expected_locations=1, + loc_exact=True, + ) + lldbutil.run_break_set_by_file_and_line( + self, + "symlink2/bar.h", + self.line_in_bar, + num_expected_locations=1, + loc_exact=True, + ) + lldbutil.run_break_set_by_file_and_line( + self, + "symlink2/qux.h", + self.line_in_qux, + num_expected_locations=1, + loc_exact=True, + ) + + ###################################################################### + # Symlinked file + # -------------------------------------------------------------------- + # - `symlink1/foo.h` is a symlink file, pointing at `real/foo.h` + # - main.c includes `symlink1/foo.h`. + # - As a result, the line-table contains a support file `(test_base_dir)/symlink1/foo.h` + # - Setting a breakpoint for `real/foo.h` won't be resolved, because it doesn't match the above path. + # - Setting a realpath prefix to the current working directory will cause the above support file to be realpath'ed to `(test_base_dir)/real/foo.h` + # - Now setting a breakpoint for `real/foo.h` will be resolved. + lldbutil.run_break_set_by_file_and_line( + self, + "real/foo.h", + self.line_in_foo, + num_expected_locations=0, + loc_exact=True, + ) + self.runCmd(f'settings set target.source-realpath-prefixes "{cwd}"') + lldbutil.run_break_set_by_file_and_line( + self, + "real/foo.h", + self.line_in_foo, + num_expected_locations=1, + loc_exact=True, + ) + # Clear settings so that the test below won't be affected. + self.runCmd("settings clear target.source-realpath-prefixes") + + ###################################################################### + # Symlinked directory + # -------------------------------------------------------------------- + # - `symlink2` is a symlink directory, pointing at `real`. + # - So, `symlink2/bar.h` will be realpath'ed to `real/bar.h`. + # - main.c includes `symlink2/bar.h`. + # - As a result, the line-table contains a support file `(test_base_dir)/symlink2/bar.h` + # - Setting a breakpoint for `real/bar.h` won't be resolved, because it doesn't match the above path. + # - Setting a realpath prefix to the current working directory will cause the above support file to be realpath'ed to `(test_base_dir)/real/bar.h` + # - Now setting a breakpoint for `real/bar.h` will be resolved. + lldbutil.run_break_set_by_file_and_line( + self, + "real/bar.h", + self.line_in_foo, + num_expected_locations=0, + loc_exact=True, + ) + self.runCmd(f'settings set target.source-realpath-prefixes "{cwd}"') + lldbutil.run_break_set_by_file_and_line( + self, + "real/bar.h", + self.line_in_foo, + num_expected_locations=1, + loc_exact=True, + ) + # Clear settings so that the test below won't be affected. + self.runCmd("settings clear target.source-realpath-prefixes") + + ###################################################################### + # Symlink + source-map + # -------------------------------------------------------------------- + # - `symlink2` is a symlink directory, pointing at `real`. + # - So, `symlink2/qux.h` will be realpath'ed to `real/qux.h`. + # - main.c includes `symlink2/qux.h`. + # - As a result, the line-table contains a support file `(test_base_dir)/symlink2/qux.h` + # - Setting a realpath prefix to the current working directory will cause the above support file to be realpath'ed to `(test_base_dir)/real/qux.h` + # - Setting a breakpoint for `to-be-mapped/qux.h` won't be resolved, because it doesn't match the above path. + # - After setting a source-map, setting the same breakpoint will be resolved, because the input path `to-be-mapped/qux.h` is reverse-mapped to `real/qux.h`, which matches the realpath'ed support file. + lldbutil.run_break_set_by_file_and_line( + self, + "real/qux.h", + self.line_in_foo, + num_expected_locations=0, + loc_exact=True, + ) + self.runCmd(f'settings set target.source-realpath-prefixes "{cwd}"') + lldbutil.run_break_set_by_file_and_line( + self, + "to-be-mapped/qux.h", + self.line_in_foo, + num_expected_locations=0, + loc_exact=True, + ) + self.runCmd('settings set target.source-map "real" "to-be-mapped"') + lldbutil.run_break_set_by_file_and_line( + self, + "to-be-mapped/qux.h", + self.line_in_foo, + num_expected_locations=1, + loc_exact=True, + ) + # Clear settings so that the test below won't be affected. + self.runCmd("settings clear target.source-realpath-prefixes") diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/main.c b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/main.c new file mode 100644 index 000000000000000..716e7a91715f9b2 --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/main.c @@ -0,0 +1,10 @@ +#include "symlink1/foo.h" +#include "symlink2/bar.h" +#include "symlink2/qux.h" + +int main(int argc, char const *argv[]) { + int a = foo(); // 1 + int b = bar(); // 2 + int c = qux(); // 3 + return a + b - c; // Set break point at this line. +} diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/bar.h b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/bar.h new file mode 100644 index 000000000000000..40155eb2075ab65 --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/bar.h @@ -0,0 +1,3 @@ +int bar() { + return 2; // Set break point at this line. +} diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/foo.h b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/foo.h new file mode 100644 index 000000000000000..a4ceefecf012389 --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/foo.h @@ -0,0 +1,3 @@ +int foo() { + return 1; // Set break point at this line. +} diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/qux.h b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/qux.h new file mode 100644 index 000000000000000..a90e0feba30aa55 --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/real/qux.h @@ -0,0 +1,3 @@ +int qux() { + return 3; // Set break point at this line. +} diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/symlink1/foo.h b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/symlink1/foo.h new file mode 120000 index 000000000000000..c9001b4753bba67 --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/symlink1/foo.h @@ -0,0 +1 @@ +../real/foo.h \ No newline at end of file diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/symlink2 b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/symlink2 new file mode 120000 index 000000000000000..ac558a3e1bf4442 --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/symlink2 @@ -0,0 +1 @@ +real \ No newline at end of file diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/to-be-mapped/README.md b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/to-be-mapped/README.md new file mode 100644 index 000000000000000..5067af8eed047aa --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_with_realpath_and_source_map/to-be-mapped/README.md @@ -0,0 +1 @@ +This is an empty folder just so that `settings set target.source-map "real" "to-be-mapped"` can be run successfully - it requires tha the latter path is valid. diff --git a/lldb/unittests/Core/CMakeLists.txt b/lldb/unittests/Core/CMakeLists.txt index d40c357e3f463be..949963fd4034639 100644 --- a/lldb/unittests/Core/CMakeLists.txt +++ b/lldb/unittests/Core/CMakeLists.txt @@ -3,7 +3,6 @@ add_lldb_unittest(LLDBCoreTests DiagnosticEventTest.cpp DumpDataExtractorTest.cpp DumpRegisterInfoTest.cpp - FileSpecListTest.cpp FormatEntityTest.cpp MangledTest.cpp ModuleSpecTest.cpp diff --git a/lldb/unittests/Core/FileSpecListTest.cpp b/lldb/unittests/Core/FileSpecListTest.cpp deleted file mode 100644 index e63f4a00bc3a944..000000000000000 --- a/lldb/unittests/Core/FileSpecListTest.cpp +++ /dev/null @@ -1,125 +0,0 @@ -//===-- FileSpecListTest.cpp ----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include "lldb/Utility/FileSpecList.h" - -using namespace lldb_private; - -static FileSpec PosixSpec(llvm::StringRef path) { - return FileSpec(path, FileSpec::Style::posix); -} - -static FileSpec WindowsSpec(llvm::StringRef path) { - return FileSpec(path, FileSpec::Style::windows); -} - -TEST(SupportFileListTest, RelativePathMatchesPosix) { - - const FileSpec fullpath = PosixSpec("/build/src/main.cpp"); - const FileSpec relative = PosixSpec("./src/main.cpp"); - const FileSpec basename = PosixSpec("./main.cpp"); - const FileSpec full_wrong = PosixSpec("/other/wrong/main.cpp"); - const FileSpec rel_wrong = PosixSpec("./wrong/main.cpp"); - // Make sure these don't match "src/main.cpp" as we want to match full - // directories only - const FileSpec rel2_wrong = PosixSpec("asrc/main.cpp"); - const FileSpec rel3_wrong = PosixSpec("rc/main.cpp"); - - SupportFileList files; - files.Append(fullpath); - files.Append(relative); - files.Append(basename); - files.Append(full_wrong); - files.Append(rel_wrong); - files.Append(rel2_wrong); - files.Append(rel3_wrong); - - // Make sure the full path only matches the first entry - EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, fullpath)); - EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, fullpath)); - EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, fullpath)); - EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(3, fullpath)); - // Make sure the relative path matches the all of the entries that contain - // the relative path - EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, relative)); - EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, relative)); - EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, relative)); - EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(3, relative)); - - // Make sure looking file a file using the basename matches all entries - EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, basename)); - EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, basename)); - EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, basename)); - EXPECT_EQ((size_t)3, files.FindCompatibleIndex(3, basename)); - EXPECT_EQ((size_t)4, files.FindCompatibleIndex(4, basename)); - EXPECT_EQ((size_t)5, files.FindCompatibleIndex(5, basename)); - EXPECT_EQ((size_t)6, files.FindCompatibleIndex(6, basename)); - - // Make sure that paths that have a common suffix don't return values that - // don't match on directory delimiters. - EXPECT_EQ((size_t)2, files.FindCompatibleIndex(0, rel2_wrong)); - EXPECT_EQ((size_t)5, files.FindCompatibleIndex(3, rel2_wrong)); - EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(6, rel2_wrong)); - - EXPECT_EQ((size_t)2, files.FindCompatibleIndex(0, rel3_wrong)); - EXPECT_EQ((size_t)6, files.FindCompatibleIndex(3, rel3_wrong)); -} - -TEST(SupportFileListTest, RelativePathMatchesWindows) { - - const FileSpec fullpath = WindowsSpec(R"(C:\build\src\main.cpp)"); - const FileSpec relative = WindowsSpec(R"(.\src\main.cpp)"); - const FileSpec basename = WindowsSpec(R"(.\main.cpp)"); - const FileSpec full_wrong = WindowsSpec(R"(\other\wrong\main.cpp)"); - const FileSpec rel_wrong = WindowsSpec(R"(.\wrong\main.cpp)"); - // Make sure these don't match "src\main.cpp" as we want to match full - // directories only - const FileSpec rel2_wrong = WindowsSpec(R"(asrc\main.cpp)"); - const FileSpec rel3_wrong = WindowsSpec(R"("rc\main.cpp)"); - - SupportFileList files; - files.Append(fullpath); - files.Append(relative); - files.Append(basename); - files.Append(full_wrong); - files.Append(rel_wrong); - files.Append(rel2_wrong); - files.Append(rel3_wrong); - - // Make sure the full path only matches the first entry - EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, fullpath)); - EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, fullpath)); - EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, fullpath)); - EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(3, fullpath)); - // Make sure the relative path matches the all of the entries that contain - // the relative path - EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, relative)); - EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, relative)); - EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, relative)); - EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(3, relative)); - - // Make sure looking file a file using the basename matches all entries - EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, basename)); - EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, basename)); - EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, basename)); - EXPECT_EQ((size_t)3, files.FindCompatibleIndex(3, basename)); - EXPECT_EQ((size_t)4, files.FindCompatibleIndex(4, basename)); - EXPECT_EQ((size_t)5, files.FindCompatibleIndex(5, basename)); - EXPECT_EQ((size_t)6, files.FindCompatibleIndex(6, basename)); - - // Make sure that paths that have a common suffix don't return values that - // don't match on directory delimiters. - EXPECT_EQ((size_t)2, files.FindCompatibleIndex(0, rel2_wrong)); - EXPECT_EQ((size_t)5, files.FindCompatibleIndex(3, rel2_wrong)); - EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(6, rel2_wrong)); - - EXPECT_EQ((size_t)2, files.FindCompatibleIndex(0, rel3_wrong)); - EXPECT_EQ((size_t)6, files.FindCompatibleIndex(3, rel3_wrong)); -} diff --git a/lldb/unittests/Utility/CMakeLists.txt b/lldb/unittests/Utility/CMakeLists.txt index 8e12815d51541c9..40e0959fc01d142 100644 --- a/lldb/unittests/Utility/CMakeLists.txt +++ b/lldb/unittests/Utility/CMakeLists.txt @@ -12,6 +12,7 @@ add_lldb_unittest(UtilityTests DataExtractorTest.cpp EnvironmentTest.cpp EventTest.cpp + FileSpecListTest.cpp FileSpecTest.cpp FlagsTest.cpp ListenerTest.cpp @@ -22,6 +23,7 @@ add_lldb_unittest(UtilityTests ProcessInstanceInfoTest.cpp RangeMapTest.cpp RangeTest.cpp + RealpathPrefixesTest.cpp RegisterValueTest.cpp RegularExpressionTest.cpp ScalarTest.cpp @@ -49,6 +51,7 @@ add_lldb_unittest(UtilityTests XcodeSDKTest.cpp LINK_LIBS + lldbTarget lldbUtility lldbUtilityHelpers LLVMTestingSupport diff --git a/lldb/unittests/Utility/FileSpecListTest.cpp b/lldb/unittests/Utility/FileSpecListTest.cpp new file mode 100644 index 000000000000000..4c04f434261301a --- /dev/null +++ b/lldb/unittests/Utility/FileSpecListTest.cpp @@ -0,0 +1,323 @@ +//===-- FileSpecListTest.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +#include "MockSymlinkFileSystem.h" +#include "lldb/Utility/FileSpecList.h" +#include "lldb/Utility/RealpathPrefixes.h" + +using namespace lldb_private; + +static FileSpec PosixSpec(llvm::StringRef path) { + return FileSpec(path, FileSpec::Style::posix); +} + +static FileSpec WindowsSpec(llvm::StringRef path) { + return FileSpec(path, FileSpec::Style::windows); +} + +TEST(SupportFileListTest, RelativePathMatchesPosix) { + + const FileSpec fullpath = PosixSpec("/build/src/main.cpp"); + const FileSpec relative = PosixSpec("./src/main.cpp"); + const FileSpec basename = PosixSpec("./main.cpp"); + const FileSpec full_wrong = PosixSpec("/other/wrong/main.cpp"); + const FileSpec rel_wrong = PosixSpec("./wrong/main.cpp"); + // Make sure these don't match "src/main.cpp" as we want to match full + // directories only + const FileSpec rel2_wrong = PosixSpec("asrc/main.cpp"); + const FileSpec rel3_wrong = PosixSpec("rc/main.cpp"); + + SupportFileList files; + files.Append(fullpath); + files.Append(relative); + files.Append(basename); + files.Append(full_wrong); + files.Append(rel_wrong); + files.Append(rel2_wrong); + files.Append(rel3_wrong); + + // Make sure the full path only matches the first entry + EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, fullpath)); + EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, fullpath)); + EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, fullpath)); + EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(3, fullpath)); + // Make sure the relative path matches the all of the entries that contain + // the relative path + EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, relative)); + EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, relative)); + EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, relative)); + EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(3, relative)); + + // Make sure looking file a file using the basename matches all entries + EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, basename)); + EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, basename)); + EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, basename)); + EXPECT_EQ((size_t)3, files.FindCompatibleIndex(3, basename)); + EXPECT_EQ((size_t)4, files.FindCompatibleIndex(4, basename)); + EXPECT_EQ((size_t)5, files.FindCompatibleIndex(5, basename)); + EXPECT_EQ((size_t)6, files.FindCompatibleIndex(6, basename)); + + // Make sure that paths that have a common suffix don't return values that + // don't match on directory delimiters. + EXPECT_EQ((size_t)2, files.FindCompatibleIndex(0, rel2_wrong)); + EXPECT_EQ((size_t)5, files.FindCompatibleIndex(3, rel2_wrong)); + EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(6, rel2_wrong)); + + EXPECT_EQ((size_t)2, files.FindCompatibleIndex(0, rel3_wrong)); + EXPECT_EQ((size_t)6, files.FindCompatibleIndex(3, rel3_wrong)); +} + +TEST(SupportFileListTest, RelativePathMatchesWindows) { + + const FileSpec fullpath = WindowsSpec(R"(C:\build\src\main.cpp)"); + const FileSpec relative = WindowsSpec(R"(.\src\main.cpp)"); + const FileSpec basename = WindowsSpec(R"(.\main.cpp)"); + const FileSpec full_wrong = WindowsSpec(R"(\other\wrong\main.cpp)"); + const FileSpec rel_wrong = WindowsSpec(R"(.\wrong\main.cpp)"); + // Make sure these don't match "src\main.cpp" as we want to match full + // directories only + const FileSpec rel2_wrong = WindowsSpec(R"(asrc\main.cpp)"); + const FileSpec rel3_wrong = WindowsSpec(R"("rc\main.cpp)"); + + SupportFileList files; + files.Append(fullpath); + files.Append(relative); + files.Append(basename); + files.Append(full_wrong); + files.Append(rel_wrong); + files.Append(rel2_wrong); + files.Append(rel3_wrong); + + // Make sure the full path only matches the first entry + EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, fullpath)); + EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, fullpath)); + EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, fullpath)); + EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(3, fullpath)); + // Make sure the relative path matches the all of the entries that contain + // the relative path + EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, relative)); + EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, relative)); + EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, relative)); + EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(3, relative)); + + // Make sure looking file a file using the basename matches all entries + EXPECT_EQ((size_t)0, files.FindCompatibleIndex(0, basename)); + EXPECT_EQ((size_t)1, files.FindCompatibleIndex(1, basename)); + EXPECT_EQ((size_t)2, files.FindCompatibleIndex(2, basename)); + EXPECT_EQ((size_t)3, files.FindCompatibleIndex(3, basename)); + EXPECT_EQ((size_t)4, files.FindCompatibleIndex(4, basename)); + EXPECT_EQ((size_t)5, files.FindCompatibleIndex(5, basename)); + EXPECT_EQ((size_t)6, files.FindCompatibleIndex(6, basename)); + + // Make sure that paths that have a common suffix don't return values that + // don't match on directory delimiters. + EXPECT_EQ((size_t)2, files.FindCompatibleIndex(0, rel2_wrong)); + EXPECT_EQ((size_t)5, files.FindCompatibleIndex(3, rel2_wrong)); + EXPECT_EQ((size_t)UINT32_MAX, files.FindCompatibleIndex(6, rel2_wrong)); + + EXPECT_EQ((size_t)2, files.FindCompatibleIndex(0, rel3_wrong)); + EXPECT_EQ((size_t)6, files.FindCompatibleIndex(3, rel3_wrong)); +} + +// Support file is a symlink to the breakpoint file. +// Absolute paths are used. +// A matching prefix is set. +// Should find it compatible. +TEST(SupportFileListTest, SymlinkedAbsolutePaths) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("/symlink_dir/foo.h"), FileSpec("/real_dir/foo.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("/symlink_dir"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Prepare support file list + SupportFileList support_file_list; + support_file_list.EmplaceBack(FileSpec("/symlink_dir/foo.h")); + + // Test + size_t ret = support_file_list.FindCompatibleIndex( + 0, FileSpec("/real_dir/foo.h"), &prefixes); + EXPECT_EQ(ret, (size_t)0); +} + +// Support file is a symlink to the breakpoint file. +// Absolute paths are used. +// A matching prefix is set, which is the root directory. +// Should find it compatible. +TEST(SupportFileListTest, RootDirectory) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("/symlink_dir/foo.h"), FileSpec("/real_dir/foo.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("/"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Prepare support file list + SupportFileList support_file_list; + support_file_list.EmplaceBack(FileSpec("/symlink_dir/foo.h")); + + // Test + size_t ret = support_file_list.FindCompatibleIndex( + 0, FileSpec("/real_dir/foo.h"), &prefixes); + EXPECT_EQ(ret, (size_t)0); +} + +// Support file is a symlink to the breakpoint file. +// Relative paths are used. +// A matching prefix is set. +// Should find it compatible. +TEST(SupportFileListTest, SymlinkedRelativePaths) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("symlink_dir/foo.h"), FileSpec("real_dir/foo.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("symlink_dir"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Prepare support file list + SupportFileList support_file_list; + support_file_list.EmplaceBack(FileSpec("symlink_dir/foo.h")); + + // Test + size_t ret = support_file_list.FindCompatibleIndex( + 0, FileSpec("real_dir/foo.h"), &prefixes); + EXPECT_EQ(ret, (size_t)0); +} + +// Support file is a symlink to the breakpoint file. +// A matching prefix is set. +// Input file only match basename and not directory. +// Should find it incompatible. +TEST(SupportFileListTest, RealpathOnlyMatchFileName) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("symlink_dir/foo.h"), FileSpec("real_dir/foo.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("symlink_dir"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Prepare support file list + SupportFileList support_file_list; + support_file_list.EmplaceBack(FileSpec("symlink_dir/foo.h")); + + // Test + size_t ret = support_file_list.FindCompatibleIndex( + 0, FileSpec("some_other_dir/foo.h"), &prefixes); + EXPECT_EQ(ret, UINT32_MAX); +} + +// Support file is a symlink to the breakpoint file. +// A prefix is set, which is a matching string prefix, but not a path prefix. +// Should find it incompatible. +TEST(SupportFileListTest, DirectoryMatchStringPrefixButNotWholeDirectoryName) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("symlink_dir/foo.h"), FileSpec("real_dir/foo.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("symlink"); // This is a string prefix of the + // symlink but not a path prefix. + RealpathPrefixes prefixes(file_spec_list, fs); + + // Prepare support file list + SupportFileList support_file_list; + support_file_list.EmplaceBack(FileSpec("symlink_dir/foo.h")); + + // Test + size_t ret = support_file_list.FindCompatibleIndex( + 0, FileSpec("real_dir/foo.h"), &prefixes); + EXPECT_EQ(ret, UINT32_MAX); +} + +// Support file is a symlink to the breakpoint file. +// A matching prefix is set. +// However, the breakpoint is set with a partial path. +// Should find it compatible. +TEST(SupportFileListTest, PartialBreakpointPath) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("symlink_dir/foo.h"), FileSpec("/real_dir/foo.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("symlink_dir"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Prepare support file list + SupportFileList support_file_list; + support_file_list.EmplaceBack(FileSpec("symlink_dir/foo.h")); + + // Test + size_t ret = support_file_list.FindCompatibleIndex( + 0, FileSpec("real_dir/foo.h"), &prefixes); + EXPECT_EQ(ret, (size_t)0); +} + +// Support file is a symlink to the breakpoint file. +// A matching prefix is set. +// However, the basename is different between the symlink and its target. +// Should find it incompatible. +TEST(SupportFileListTest, DifferentBasename) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("/symlink_dir/foo.h"), FileSpec("/real_dir/bar.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("/symlink_dir"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Prepare support file list + SupportFileList support_file_list; + support_file_list.EmplaceBack(FileSpec("/symlink_dir/foo.h")); + + // Test + size_t ret = support_file_list.FindCompatibleIndex( + 0, FileSpec("real_dir/bar.h"), &prefixes); + EXPECT_EQ(ret, UINT32_MAX); +} + +// No prefixes are configured. +// The support file and the breakpoint file are different. +// Should find it incompatible. +TEST(SupportFileListTest, NoPrefixes) { + // Prepare support file list + SupportFileList support_file_list; + support_file_list.EmplaceBack(FileSpec("/real_dir/bar.h")); + + // Test + size_t ret = support_file_list.FindCompatibleIndex( + 0, FileSpec("/real_dir/foo.h"), nullptr); + EXPECT_EQ(ret, UINT32_MAX); +} + +// No prefixes are configured. +// The support file and the breakpoint file are the same. +// Should find it compatible. +TEST(SupportFileListTest, SameFile) { + // Prepare support file list + SupportFileList support_file_list; + support_file_list.EmplaceBack(FileSpec("/real_dir/foo.h")); + + // Test + size_t ret = support_file_list.FindCompatibleIndex( + 0, FileSpec("/real_dir/foo.h"), nullptr); + EXPECT_EQ(ret, (size_t)0); +} diff --git a/lldb/unittests/Utility/MockSymlinkFileSystem.h b/lldb/unittests/Utility/MockSymlinkFileSystem.h new file mode 100644 index 000000000000000..7fa1f93bfa38a9a --- /dev/null +++ b/lldb/unittests/Utility/MockSymlinkFileSystem.h @@ -0,0 +1,66 @@ +//===-- MockSymlinkFileSystem.h +//--------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Utility/FileSpec.h" +#include "llvm/Support/VirtualFileSystem.h" + +namespace lldb_private { + +// A mock file system that realpath's a given symlink to a given realpath. +class MockSymlinkFileSystem : public llvm::vfs::FileSystem { +public: + // Treat all files as non-symlinks. + MockSymlinkFileSystem() = default; + + /// Treat \a symlink as a symlink to \a realpath. Treat all other files as + /// non-symlinks. + MockSymlinkFileSystem(FileSpec &&symlink, FileSpec &&realpath, + FileSpec::Style style = FileSpec::Style::native) + : m_symlink(std::move(symlink)), m_realpath(std::move(realpath)), + m_style(style) {} + + /// If \a Path matches the symlink given in the ctor, put the realpath given + /// in the ctor into \a Output. + std::error_code getRealPath(const llvm::Twine &Path, + llvm::SmallVectorImpl &Output) override { + if (FileSpec(Path.str(), m_style) == m_symlink) { + std::string path = m_realpath.GetPath(); + Output.assign(path.begin(), path.end()); + } else { + Path.toVector(Output); + } + return {}; + } + + // Implement the rest of the interface + llvm::ErrorOr status(const llvm::Twine &Path) override { + return llvm::errc::operation_not_permitted; + } + llvm::ErrorOr> + openFileForRead(const llvm::Twine &Path) override { + return llvm::errc::operation_not_permitted; + } + llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir, + std::error_code &EC) override { + return llvm::vfs::directory_iterator(); + } + std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override { + return llvm::errc::operation_not_permitted; + } + llvm::ErrorOr getCurrentWorkingDirectory() const override { + return llvm::errc::operation_not_permitted; + } + +private: + FileSpec m_symlink; + FileSpec m_realpath; + FileSpec::Style m_style; +}; + +} // namespace lldb_private diff --git a/lldb/unittests/Utility/RealpathPrefixesTest.cpp b/lldb/unittests/Utility/RealpathPrefixesTest.cpp new file mode 100644 index 000000000000000..872ddf1fd223aa9 --- /dev/null +++ b/lldb/unittests/Utility/RealpathPrefixesTest.cpp @@ -0,0 +1,140 @@ +//===-- RealpathPrefixesTest.cpp +//--------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +#include "MockSymlinkFileSystem.h" +#include "lldb/Utility/FileSpecList.h" +#include "lldb/Utility/RealpathPrefixes.h" + +using namespace lldb_private; + +// Should resolve a symlink which match an absolute prefix +TEST(RealpathPrefixesTest, MatchingAbsolutePrefix) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("/dir1/link.h"), FileSpec("/dir2/real.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("/dir1"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Test + std::optional ret = + prefixes.ResolveSymlinks(FileSpec("/dir1/link.h")); + EXPECT_EQ(ret, FileSpec("/dir2/real.h")); +} + +// Should resolve a symlink which match a relative prefix +TEST(RealpathPrefixesTest, MatchingRelativePrefix) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("dir1/link.h"), FileSpec("dir2/real.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("dir1"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Test + std::optional ret = + prefixes.ResolveSymlinks(FileSpec("dir1/link.h")); + EXPECT_EQ(ret, FileSpec("dir2/real.h")); +} + +// Should resolve in Windows and/or with a case-insensitive support file +TEST(RealpathPrefixesTest, WindowsAndCaseInsensitive) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("f:\\dir1\\link.h", FileSpec::Style::windows), + FileSpec("f:\\dir2\\real.h", FileSpec::Style::windows), + FileSpec::Style::windows)); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack(FileSpec("f:\\dir1", FileSpec::Style::windows)); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Test + std::optional ret = prefixes.ResolveSymlinks( + FileSpec("F:\\DIR1\\LINK.H", FileSpec::Style::windows)); + EXPECT_EQ(ret, FileSpec("f:\\dir2\\real.h", FileSpec::Style::windows)); +} + +// Should resolve a symlink when there is mixture of matching and mismatching +// prefixex +TEST(RealpathPrefixesTest, MatchingAndMismatchingPrefix) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("/dir1/link.h"), FileSpec("/dir2/real.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("/fake/path1"); + file_spec_list.EmplaceBack("/dir1"); // Matching prefix + file_spec_list.EmplaceBack("/fake/path2"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Test + std::optional ret = + prefixes.ResolveSymlinks(FileSpec("/dir1/link.h")); + EXPECT_EQ(ret, FileSpec("/dir2/real.h")); +} + +// Should resolve a symlink when the prefixes matches after normalization +TEST(RealpathPrefixesTest, ComplexPrefixes) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("dir1/link.h"), FileSpec("dir2/real.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("./dir1/foo/../bar/.."); // Equivalent to "/dir1" + RealpathPrefixes prefixes(file_spec_list, fs); + + // Test + std::optional ret = + prefixes.ResolveSymlinks(FileSpec("dir1/link.h")); + EXPECT_EQ(ret, FileSpec("dir2/real.h")); +} + +// Should not resolve a symlink which doesn't match any prefixes +TEST(RealpathPrefixesTest, MismatchingPrefixes) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs(new MockSymlinkFileSystem( + FileSpec("/dir1/link.h"), FileSpec("/dir2/real.h"))); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("/dir3"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Test + std::optional ret = + prefixes.ResolveSymlinks(FileSpec("/dir1/link.h")); + EXPECT_EQ(ret, std::nullopt); +} + +// Should not resolve a realpath +TEST(RealpathPrefixesTest, Realpath) { + // Prepare FS + llvm::IntrusiveRefCntPtr fs( + new MockSymlinkFileSystem()); + + // Prepare RealpathPrefixes + FileSpecList file_spec_list; + file_spec_list.EmplaceBack("/symlink_dir"); + RealpathPrefixes prefixes(file_spec_list, fs); + + // Test + std::optional ret = + prefixes.ResolveSymlinks(FileSpec("/dir/real.h")); + EXPECT_EQ(ret, std::nullopt); +} From 7ab2d504a1b30ce7a1338d23b0f443c8b24f94c4 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 15 Aug 2024 18:26:52 +0000 Subject: [PATCH 061/441] [gn build] Port 47721d46187f --- llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn index 07b0a7ac66823ca..5faa365bb7bdbcd 100644 --- a/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn @@ -35,6 +35,7 @@ static_library("Utility") { "Log.cpp", "NameMatches.cpp", "ProcessInfo.cpp", + "RealpathPrefixes.cpp", "RegisterValue.cpp", "RegularExpression.cpp", "Scalar.cpp", From 6cbd96e24c6a60cbc3dbb849d2ed7afc39c77a80 Mon Sep 17 00:00:00 2001 From: "Oleksandr T." Date: Thu, 15 Aug 2024 21:42:39 +0300 Subject: [PATCH 062/441] [Clang] handle both gnu and cpp11 attributes to ensure correct parsing inside extern block (#102864) Fixes #101990 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Parse/ParseDeclCXX.cpp | 5 ++++- clang/test/Parser/attr-order.cpp | 13 +++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5ba9fcb040e3a61..68cf79928bda021 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -257,6 +257,7 @@ Bug Fixes to C++ Support - Properly reject defaulted relational operators with invalid types for explicit object parameters, e.g., ``bool operator==(this int, const Foo&)`` (#GH100329), and rvalue reference parameters. - Properly reject defaulted copy/move assignment operators that have a non-reference explicit object parameter. +- Clang now properly handles the order of attributes in `extern` blocks. (#GH101990). - Fixed an assertion failure by preventing null explicit object arguments from being deduced. (#GH102025). Bug Fixes to AST Handling diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index aac89d910bbc839..d45a738fe4c5969 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -425,7 +425,10 @@ Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) { [[fallthrough]]; default: ParsedAttributes DeclAttrs(AttrFactory); - MaybeParseCXX11Attributes(DeclAttrs); + ParsedAttributes DeclSpecAttrs(AttrFactory); + while (MaybeParseCXX11Attributes(DeclAttrs) || + MaybeParseGNUAttributes(DeclSpecAttrs)) + ; ParseExternalDeclaration(DeclAttrs, DeclSpecAttrs); continue; } diff --git a/clang/test/Parser/attr-order.cpp b/clang/test/Parser/attr-order.cpp index 10bad38cac64470..21e30e5b6f9df1a 100644 --- a/clang/test/Parser/attr-order.cpp +++ b/clang/test/Parser/attr-order.cpp @@ -31,3 +31,16 @@ template template [[noreturn]] __declspec(dllexport) __attribute__((cdecl)) void k(); // ok + +extern "C" { + __attribute__ ((__warn_unused_result__)) [[__maybe_unused__]] int l(int); // ok + [[__maybe_unused__]] __attribute__ ((__warn_unused_result__)) int m(int); // ok +} + +extern "C" { + __attribute__ ((__warn_unused_result__)) [[__maybe_unused__]] int n (int); // ok + __attribute__ ((__warn_unused_result__)) [[__maybe_unused__]] static int o (int x) { return x; }; // ok +} + +extern "C" __attribute__ ((__warn_unused_result__)) [[__maybe_unused__]] int p(int); // ok +extern "C" [[__maybe_unused__]] __attribute__ ((__warn_unused_result__)) int q(int); // ok From 85da39debd8ee8c3186d88b462a924ea57b812ec Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 15 Aug 2024 20:45:49 +0200 Subject: [PATCH 063/441] [libc++] Remove the allocator extension (#102655) In LLVM 19 removed the extension with an opt-in macro. This finally removes that option too and removes a few `const_cast`s where I know that they exist only to support this extension. --- libcxx/docs/ReleaseNotes/20.rst | 2 +- libcxx/include/__memory/allocator.h | 95 ------------------- .../__memory/uninitialized_algorithms.h | 16 ++-- 3 files changed, 7 insertions(+), 106 deletions(-) diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index b319067394099a2..fe9f4c1973cdb43 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -62,7 +62,7 @@ Deprecations and Removals ``_LIBCPP_ENABLE_REMOVED_WEEKDAY_RELATIONAL_OPERATORS`` macro that was used to re-enable this extension will be ignored in LLVM 20. -- TODO: The ``_LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST`` macro will no longer have an effect. +- The ``_LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST`` macro no longer has any effect. ``std::allocator`` is not supported as an extension anymore, please migrate any code that uses e.g. ``std::vector`` to be standards conforming. Upcoming Deprecations and Removals ---------------------------------- diff --git a/libcxx/include/__memory/allocator.h b/libcxx/include/__memory/allocator.h index ae1f549626ee49e..0dbdc41d3c3d148 100644 --- a/libcxx/include/__memory/allocator.h +++ b/libcxx/include/__memory/allocator.h @@ -47,22 +47,6 @@ class _LIBCPP_TEMPLATE_VIS allocator { typedef allocator<_Up> other; }; }; - -// TODO(LLVM 20): Remove the escape hatch -# ifdef _LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST -template <> -class _LIBCPP_TEMPLATE_VIS allocator { -public: - _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* pointer; - _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; - _LIBCPP_DEPRECATED_IN_CXX17 typedef const void value_type; - - template - struct _LIBCPP_DEPRECATED_IN_CXX17 rebind { - typedef allocator<_Up> other; - }; -}; -# endif // _LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST #endif // _LIBCPP_STD_VER <= 17 // This class provides a non-trivial default constructor to the class that derives from it @@ -171,85 +155,6 @@ class _LIBCPP_TEMPLATE_VIS allocator : private __non_trivial_if::v #endif }; -// TODO(LLVM 20): Remove the escape hatch -#ifdef _LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST -template -class _LIBCPP_TEMPLATE_VIS allocator - : private __non_trivial_if::value, allocator > { - static_assert(!is_volatile<_Tp>::value, "std::allocator does not support volatile types"); - -public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef const _Tp value_type; - typedef true_type propagate_on_container_move_assignment; -# if _LIBCPP_STD_VER <= 23 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_ALLOCATOR_MEMBERS) - _LIBCPP_DEPRECATED_IN_CXX23 typedef true_type is_always_equal; -# endif - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator() _NOEXCEPT = default; - - template - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator(const allocator<_Up>&) _NOEXCEPT {} - - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const _Tp* allocate(size_t __n) { - if (__n > allocator_traits::max_size(*this)) - __throw_bad_array_new_length(); - if (__libcpp_is_constant_evaluated()) { - return static_cast(::operator new(__n * sizeof(_Tp))); - } else { - return static_cast(std::__libcpp_allocate(__n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp))); - } - } - -# if _LIBCPP_STD_VER >= 23 - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr allocation_result allocate_at_least(size_t __n) { - return {allocate(__n), __n}; - } -# endif - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void deallocate(const _Tp* __p, size_t __n) { - if (__libcpp_is_constant_evaluated()) { - ::operator delete(const_cast<_Tp*>(__p)); - } else { - std::__libcpp_deallocate((void*)const_cast<_Tp*>(__p), __n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp)); - } - } - - // C++20 Removed members -# if _LIBCPP_STD_VER <= 17 - _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp* pointer; - _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp* const_pointer; - _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp& reference; - _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp& const_reference; - - template - struct _LIBCPP_DEPRECATED_IN_CXX17 rebind { - typedef allocator<_Up> other; - }; - - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI const_pointer address(const_reference __x) const _NOEXCEPT { - return std::addressof(__x); - } - - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 const _Tp* allocate(size_t __n, const void*) { - return allocate(__n); - } - - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { - return size_type(~0) / sizeof(_Tp); - } - - template - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI void construct(_Up* __p, _Args&&... __args) { - ::new ((void*)__p) _Up(std::forward<_Args>(__args)...); - } - - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI void destroy(pointer __p) { __p->~_Tp(); } -# endif -}; -#endif // _LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST - template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool operator==(const allocator<_Tp>&, const allocator<_Up>&) _NOEXCEPT { diff --git a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h index 7475ef5cf85def4..72db3a266fdd4d9 100644 --- a/libcxx/include/__memory/uninitialized_algorithms.h +++ b/libcxx/include/__memory/uninitialized_algorithms.h @@ -562,17 +562,13 @@ struct __allocator_has_trivial_copy_construct, _Type> : true_ty template , class _Out, - __enable_if_t< - // using _RawTypeIn because of the allocator extension - is_trivially_copy_constructible<_RawTypeIn>::value && is_trivially_copy_assignable<_RawTypeIn>::value && - is_same<__remove_const_t<_In>, __remove_const_t<_Out> >::value && - __allocator_has_trivial_copy_construct<_Alloc, _RawTypeIn>::value, - int> = 0> + __enable_if_t::value && is_trivially_copy_assignable<_In>::value && + is_same<__remove_const_t<_In>, __remove_const_t<_Out> >::value && + __allocator_has_trivial_copy_construct<_Alloc, _In>::value, + int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Out* __uninitialized_allocator_copy_impl(_Alloc&, _In* __first1, _In* __last1, _Out* __first2) { - // TODO: Remove the const_cast once we drop support for std::allocator if (__libcpp_is_constant_evaluated()) { while (__first1 != __last1) { std::__construct_at(std::__to_address(__first2), *__first1); @@ -581,7 +577,7 @@ __uninitialized_allocator_copy_impl(_Alloc&, _In* __first1, _In* __last1, _Out* } return __first2; } else { - return std::copy(__first1, __last1, const_cast<_RawTypeIn*>(__first2)); + return std::copy(__first1, __last1, __first2); } } @@ -642,7 +638,7 @@ __uninitialized_allocator_relocate(_Alloc& __alloc, _Tp* __first, _Tp* __last, _ __guard.__complete(); std::__allocator_destroy(__alloc, __first, __last); } else { - __builtin_memcpy(const_cast<__remove_const_t<_Tp>*>(__result), __first, sizeof(_Tp) * (__last - __first)); + __builtin_memcpy(__result, __first, sizeof(_Tp) * (__last - __first)); } } From 8ffdc8765661b2f6e6fed32de9fd95c76ff7dc9f Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Thu, 15 Aug 2024 11:50:16 -0700 Subject: [PATCH 064/441] [bazel] Port 47721d46187f89c12a13d07b5857496301cf5d6e (#104481) Made more difficult by many header circular dependencies --- .../bazel/llvm-project-overlay/lldb/BUILD.bazel | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel index b4889bbd46addd8..7841045afa6d219 100644 --- a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel @@ -564,7 +564,7 @@ cc_library( strip_include_prefix = "include", deps = [ ":Headers", - ":Utility", + ":UtilityHeaders", "//llvm:Object", "//llvm:Support", "//llvm:TargetParser", @@ -617,7 +617,6 @@ cc_library( ":InterpreterHeaders", ":SymbolHeaders", ":TargetHeaders", - ":Utility", "//clang:driver", "//llvm:Demangle", "//llvm:Support", @@ -679,7 +678,10 @@ cc_library( name = "TargetHeaders", hdrs = glob(["include/lldb/Target/**/*.h"]), strip_include_prefix = "include", - deps = [":AppleArm64ExceptionClass"], + deps = [ + ":AppleArm64ExceptionClass", + ":BreakpointHeaders", + ], ) cc_library( @@ -721,13 +723,21 @@ cc_library( deps = [":Headers"], ) +cc_library( + name = "UtilityHeaders", + hdrs = glob(["include/lldb/Utility/**/*.h"]), + strip_include_prefix = "include", +) + cc_library( name = "Utility", srcs = glob(["source/Utility/**/*.cpp"]), hdrs = glob(["include/lldb/Utility/**/*.h"]), strip_include_prefix = "include", deps = [ + ":CoreHeaders", ":Headers", + ":TargetHeaders", ":UtilityPrivateHeaders", "//llvm:BinaryFormat", "//llvm:Support", From 6bbbd301473a14a52d7ea1c5dae38ee20f97f1f2 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 15 Aug 2024 11:56:21 -0700 Subject: [PATCH 065/441] [llvm-objdump] Fix a warning This patch fixes: llvm/tools/llvm-objdump/XCOFFDump.cpp:85:12: error: unused variable 'BytesFormatted' [-Werror,-Wunused-variable] --- llvm/tools/llvm-objdump/XCOFFDump.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/tools/llvm-objdump/XCOFFDump.cpp b/llvm/tools/llvm-objdump/XCOFFDump.cpp index 5617313cdbf721b..524bf07c372a9bc 100644 --- a/llvm/tools/llvm-objdump/XCOFFDump.cpp +++ b/llvm/tools/llvm-objdump/XCOFFDump.cpp @@ -86,6 +86,7 @@ void XCOFFDumper::printFileHeader() { "%F %T", std::gmtime(&TimeDate)); assert(BytesFormatted && "The size of the buffer FormattedTime is less " "than the size of the date/time string."); + (void)BytesFormatted; printStrHex("Timestamp:", FormattedTime, Timestamp); } else { // Negative timestamp values are reserved for future use. From b5e63cc533b5e752eb475ac657f09b9bb5eb2373 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 15 Aug 2024 22:59:24 +0400 Subject: [PATCH 066/441] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} (#96872) Need to emit syncscope and new metadata to get the native instruction, most of the time. --- clang/lib/CodeGen/CGBuiltin.cpp | 41 +++++++++++++------ .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl | 2 +- .../builtins-fp-atomics-gfx12.cl | 8 ++-- .../CodeGenOpenCL/builtins-fp-atomics-gfx8.cl | 4 +- .../builtins-fp-atomics-gfx90a.cl | 8 ++-- .../builtins-fp-atomics-gfx940.cl | 8 ++-- 6 files changed, 43 insertions(+), 28 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1c0baeaee036321..f424ddaa175400c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -58,6 +58,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" @@ -18919,8 +18920,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: @@ -18932,18 +18931,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: ArgTy = llvm::FixedVectorType::get( llvm::Type::getHalfTy(getLLVMContext()), 2); IID = Intrinsic::amdgcn_global_atomic_fadd; break; - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; @@ -19366,7 +19358,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_ds_faddf: case AMDGPU::BI__builtin_amdgcn_ds_fminf: - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19382,6 +19376,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: @@ -19416,9 +19412,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), AO, SSID); } else { - // The ds_atomic_fadd_* builtins do not have syncscope/order arguments. - SSID = llvm::SyncScope::System; - AO = AtomicOrdering::SequentiallyConsistent; + // Most of the builtins do not have syncscope/order arguments. For DS + // atomics the scope doesn't really matter, as they implicitly operate at + // workgroup scope. + // + // The global/flat cases need to use agent scope to consistently produce + // the native instruction instead of a cmpxchg expansion. + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + AO = AtomicOrdering::Monotonic; // The v2bf16 builtin uses i16 instead of a natural bfloat type. if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) { @@ -19432,6 +19433,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); if (Volatile) RMW->setVolatile(true); + + unsigned AddrSpace = Ptr.getType()->getAddressSpace(); + if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) { + // Most targets require "amdgpu.no.fine.grained.memory" to emit the native + // instruction for flat and global operations. + llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {}); + RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); + + // Most targets require "amdgpu.ignore.denormal.mode" to emit the native + // instruction, but this only matters for float fadd. + if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy()) + RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); + } + return Builder.CreateBitCast(RMW, OrigTy); } case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl index f44465746356878..138616ccca7182a 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -49,7 +49,7 @@ void test_s_wait_event_export_ready() { } // CHECK-LABEL: @test_global_add_f32 -// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} void test_global_add_f32(float *rtn, global float *addr, float x) { *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); } diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 63381942eaba574..6b8a6d14575db88 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -11,7 +11,7 @@ typedef short __attribute__((ext_vector_type(2))) short2; // CHECK-LABEL: test_local_add_2bf16 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat> -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] seq_cst, align 4 +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> // GFX12-LABEL: test_local_add_2bf16 @@ -22,7 +22,7 @@ short2 test_local_add_2bf16(__local short2 *addr, short2 x) { // CHECK-LABEL: test_local_add_2bf16_noret // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat> -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] seq_cst, align 4 +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> // GFX12-LABEL: test_local_add_2bf16_noret @@ -32,7 +32,7 @@ void test_local_add_2bf16_noret(__local short2 *addr, short2 x) { } // CHECK-LABEL: test_local_add_2f16 -// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} seq_cst, align 4 +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4 // GFX12-LABEL: test_local_add_2f16 // GFX12: ds_pk_add_rtn_f16 half2 test_local_add_2f16(__local half2 *addr, half2 x) { @@ -40,7 +40,7 @@ half2 test_local_add_2f16(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_local_add_2f16_noret -// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} seq_cst, align 4 +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4 // GFX12-LABEL: test_local_add_2f16_noret // GFX12: ds_pk_add_f16 void test_local_add_2f16_noret(__local half2 *addr, half2 x) { diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl index ad4d0b7af3d4be7..2f00977ec6014e1 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl @@ -6,7 +6,7 @@ // REQUIRES: amdgpu-registered-target // CHECK-LABEL: test_fadd_local -// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} seq_cst, align 4 +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} monotonic, align 4 // GFX8-LABEL: test_fadd_local$local: // GFX8: ds_add_rtn_f32 v2, v0, v1 // GFX8: s_endpgm @@ -16,7 +16,7 @@ kernel void test_fadd_local(__local float *ptr, float val){ } // CHECK-LABEL: test_fadd_local_volatile -// CHECK: = atomicrmw volatile fadd ptr addrspace(3) %{{.+}}, float %{{.+}} seq_cst, align 4 +// CHECK: = atomicrmw volatile fadd ptr addrspace(3) %{{.+}}, float %{{.+}} monotonic, align 4 kernel void test_fadd_local_volatile(volatile __local float *ptr, float val){ volatile float *res; *res = __builtin_amdgcn_ds_atomic_fadd_f32(ptr, val); diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index e2117f11858f7f8..c525c250c937cad 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -9,7 +9,7 @@ typedef half __attribute__((ext_vector_type(2))) half2; // CHECK-LABEL: test_global_add_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} // GFX90A-LABEL: test_global_add_f64$local: // GFX90A: global_atomic_add_f64 void test_global_add_f64(__global double *addr, double x) { @@ -99,7 +99,7 @@ void test_flat_global_max_f64(__global double *addr, double x){ } // CHECK-LABEL: test_ds_add_local_f64 -// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} seq_cst, align 8 +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} monotonic, align 8 // GFX90A: test_ds_add_local_f64$local // GFX90A: ds_add_rtn_f64 void test_ds_add_local_f64(__local double *addr, double x){ @@ -108,7 +108,7 @@ void test_ds_add_local_f64(__local double *addr, double x){ } // CHECK-LABEL: test_ds_addf_local_f32 -// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} seq_cst, align 4 +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} monotonic, align 4 // GFX90A-LABEL: test_ds_addf_local_f32$local // GFX90A: ds_add_rtn_f32 void test_ds_addf_local_f32(__local float *addr, float x){ @@ -117,7 +117,7 @@ void test_ds_addf_local_f32(__local float *addr, float x){ } // CHECK-LABEL: @test_global_add_f32 -// CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, float %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} void test_global_add_f32(float *rtn, global float *addr, float x) { *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); } diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl index 92a33ceac2290f5..5481138b9fee43a 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl @@ -44,7 +44,7 @@ short2 test_global_add_2bf16(__global short2 *addr, short2 x) { // CHECK-LABEL: test_local_add_2bf16 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat> -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] seq_cst, align 4 +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4{{$}} // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> // GFX940-LABEL: test_local_add_2bf16 @@ -54,7 +54,7 @@ short2 test_local_add_2bf16(__local short2 *addr, short2 x) { } // CHECK-LABEL: test_local_add_2f16 -// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} seq_cst, align 4 +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4 // GFX940-LABEL: test_local_add_2f16 // GFX940: ds_pk_add_rtn_f16 half2 test_local_add_2f16(__local half2 *addr, half2 x) { @@ -62,7 +62,7 @@ half2 test_local_add_2f16(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_local_add_2f16_noret -// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} seq_cst, align 4 +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4 // GFX940-LABEL: test_local_add_2f16_noret // GFX940: ds_pk_add_f16 void test_local_add_2f16_noret(__local half2 *addr, half2 x) { @@ -70,7 +70,7 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: @test_global_add_f32 -// CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, float %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} void test_global_add_f32(float *rtn, global float *addr, float x) { *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); } From 2b0a8fcf702fb63fca8ec6e11dca35baf70f058d Mon Sep 17 00:00:00 2001 From: Sirraide Date: Thu, 15 Aug 2024 21:16:30 +0200 Subject: [PATCH 067/441] =?UTF-8?q?[Clang]=20Implement=20C++26=E2=80=99s?= =?UTF-8?q?=20P2893R3=20=E2=80=98Variadic=20friends=E2=80=99=20(#101448)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement P2893R3 ‘Variadic friends’ for C++26. This closes #98587. Co-authored-by: Younan Zhang --- clang/docs/LanguageExtensions.rst | 1 + clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/AST/DeclFriend.h | 39 +- .../clang/Basic/DiagnosticParseKinds.td | 6 + .../clang/Basic/DiagnosticSemaKinds.td | 4 + clang/include/clang/Sema/Sema.h | 7 +- clang/lib/AST/ASTImporter.cpp | 7 +- clang/lib/AST/DeclFriend.cpp | 14 +- clang/lib/AST/DeclPrinter.cpp | 5 +- clang/lib/AST/JSONNodeDumper.cpp | 1 + clang/lib/AST/ODRHash.cpp | 1 + clang/lib/AST/TextNodeDumper.cpp | 2 + clang/lib/Frontend/InitPreprocessor.cpp | 1 + clang/lib/Parse/ParseDeclCXX.cpp | 98 +++- clang/lib/Sema/SemaDecl.cpp | 8 +- clang/lib/Sema/SemaDeclCXX.cpp | 53 ++- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 43 +- clang/lib/Serialization/ASTReaderDecl.cpp | 1 + clang/lib/Serialization/ASTWriterDecl.cpp | 1 + clang/test/AST/ast-dump-funcs-json.cpp | 441 +++++++++++++----- clang/test/AST/cxx2c-variadic-friends.cpp | 81 ++++ clang/test/CXX/drs/cwg29xx.cpp | 25 + clang/test/Lexer/cxx-features.cpp | 4 + .../cxx2c-variadic-friends-ext-diags.cpp | 16 + clang/test/Parser/cxx2c-variadic-friends.cpp | 91 ++++ clang/test/SemaCXX/cxx2c-variadic-friends.cpp | 156 +++++++ clang/www/cxx_dr_status.html | 2 +- clang/www/cxx_status.html | 2 +- 28 files changed, 942 insertions(+), 170 deletions(-) create mode 100644 clang/test/AST/cxx2c-variadic-friends.cpp create mode 100644 clang/test/CXX/drs/cwg29xx.cpp create mode 100644 clang/test/Parser/cxx2c-variadic-friends-ext-diags.cpp create mode 100644 clang/test/Parser/cxx2c-variadic-friends.cpp create mode 100644 clang/test/SemaCXX/cxx2c-variadic-friends.cpp diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 4679dbb68b25e10..114e742f3561b75 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -1505,6 +1505,7 @@ Attributes on Lambda-Expressions C+ Attributes on Structured Bindings __cpp_structured_bindings C++26 C++03 Pack Indexing __cpp_pack_indexing C++26 C++03 ``= delete ("should have a reason");`` __cpp_deleted_function C++26 C++03 +Variadic Friends __cpp_variadic_friend C++26 C++03 -------------------------------------------- -------------------------------- ------------- ------------- Designated initializers (N494) C99 C89 Array & element qualification (N2607) C23 C89 diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 68cf79928bda021..14f1eecc5748ed0 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -126,6 +126,8 @@ C++2c Feature Support - Add ``__builtin_is_virtual_base_of`` intrinsic, which supports `P2985R0 A type trait for detecting virtual base classes `_ +- Implemented `P2893R3 Variadic Friends `_ + Resolutions to C++ Defect Reports ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/DeclFriend.h b/clang/include/clang/AST/DeclFriend.h index 9789282f351a55c..095f14a81fd5747 100644 --- a/clang/include/clang/AST/DeclFriend.h +++ b/clang/include/clang/AST/DeclFriend.h @@ -70,6 +70,9 @@ class FriendDecl final // Location of the 'friend' specifier. SourceLocation FriendLoc; + // Location of the '...', if present. + SourceLocation EllipsisLoc; + /// True if this 'friend' declaration is unsupported. Eventually we /// will support every possible friend declaration, but for now we /// silently ignore some and set this flag to authorize all access. @@ -82,10 +85,11 @@ class FriendDecl final unsigned NumTPLists : 31; FriendDecl(DeclContext *DC, SourceLocation L, FriendUnion Friend, - SourceLocation FriendL, + SourceLocation FriendL, SourceLocation EllipsisLoc, ArrayRef FriendTypeTPLists) : Decl(Decl::Friend, DC, L), Friend(Friend), FriendLoc(FriendL), - UnsupportedFriend(false), NumTPLists(FriendTypeTPLists.size()) { + EllipsisLoc(EllipsisLoc), UnsupportedFriend(false), + NumTPLists(FriendTypeTPLists.size()) { for (unsigned i = 0; i < NumTPLists; ++i) getTrailingObjects()[i] = FriendTypeTPLists[i]; } @@ -110,7 +114,7 @@ class FriendDecl final static FriendDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation L, FriendUnion Friend_, - SourceLocation FriendL, + SourceLocation FriendL, SourceLocation EllipsisLoc = {}, ArrayRef FriendTypeTPLists = std::nullopt); static FriendDecl *CreateDeserialized(ASTContext &C, GlobalDeclID ID, unsigned FriendTypeNumTPLists); @@ -143,8 +147,24 @@ class FriendDecl final return FriendLoc; } + /// Retrieves the location of the '...', if present. + SourceLocation getEllipsisLoc() const { return EllipsisLoc; } + /// Retrieves the source range for the friend declaration. SourceRange getSourceRange() const override LLVM_READONLY { + if (TypeSourceInfo *TInfo = getFriendType()) { + SourceLocation StartL = + (NumTPLists == 0) ? getFriendLoc() + : getTrailingObjects()[0] + ->getTemplateLoc(); + SourceLocation EndL = isPackExpansion() ? getEllipsisLoc() + : TInfo->getTypeLoc().getEndLoc(); + return SourceRange(StartL, EndL); + } + + if (isPackExpansion()) + return SourceRange(getFriendLoc(), getEllipsisLoc()); + if (NamedDecl *ND = getFriendDecl()) { if (const auto *FD = dyn_cast(ND)) return FD->getSourceRange(); @@ -158,15 +178,8 @@ class FriendDecl final } return SourceRange(getFriendLoc(), ND->getEndLoc()); } - else if (TypeSourceInfo *TInfo = getFriendType()) { - SourceLocation StartL = - (NumTPLists == 0) ? getFriendLoc() - : getTrailingObjects()[0] - ->getTemplateLoc(); - return SourceRange(StartL, TInfo->getTypeLoc().getEndLoc()); - } - else - return SourceRange(getFriendLoc(), getLocation()); + + return SourceRange(getFriendLoc(), getLocation()); } /// Determines if this friend kind is unsupported. @@ -177,6 +190,8 @@ class FriendDecl final UnsupportedFriend = Unsupported; } + bool isPackExpansion() const { return EllipsisLoc.isValid(); } + // Implement isa/cast/dyncast/etc. static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K == Decl::Friend; } diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 12aab09f285567f..62a97b36737e72e 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -965,6 +965,12 @@ def warn_cxx23_delete_with_message : Warning< "'= delete' with a message is incompatible with C++ standards before C++2c">, DefaultIgnore, InGroup; +def ext_variadic_friends : ExtWarn< + "variadic 'friend' declarations are a C++2c extension">, InGroup; +def warn_cxx23_variadic_friends : Warning< + "variadic 'friend' declarations are incompatible with C++ standards before C++2c">, + DefaultIgnore, InGroup; + // C++11 default member initialization def ext_nonstatic_member_init : ExtWarn< "default member initializer for non-static data member is a C++11 " diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 461eeb19f65e4a3..8a92973236ddbdb 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1741,6 +1741,10 @@ def ext_friend_tag_redecl_outside_namespace : ExtWarn< "enclosing namespace is a Microsoft extension; add a nested name specifier">, InGroup; def err_pure_friend : Error<"friend declaration cannot have a pure-specifier">; +def err_friend_template_decl_multiple_specifiers: Error< + "a friend declaration that befriends a template must contain exactly one type-specifier">; +def friend_template_decl_malformed_pack_expansion : Error< + "friend declaration expands pack %0 that is declared it its own template parameter list">; def err_invalid_base_in_interface : Error< "interface type cannot inherit from " diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index a025ff6fc13f360..88e82dca007b8bd 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3800,7 +3800,8 @@ class Sema final : public SemaBase { const ParsedAttributesView &DeclAttrs, MultiTemplateParamsArg TemplateParams, bool IsExplicitInstantiation, - RecordDecl *&AnonRecord); + RecordDecl *&AnonRecord, + SourceLocation EllipsisLoc = {}); /// BuildAnonymousStructOrUnion - Handle the declaration of an /// anonymous structure or union. Anonymous unions are a C++ feature @@ -5538,7 +5539,8 @@ class Sema final : public SemaBase { /// parameters present at all, require proper matching, i.e. /// template <> template \ friend class A::B; Decl *ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, - MultiTemplateParamsArg TemplateParams); + MultiTemplateParamsArg TemplateParams, + SourceLocation EllipsisLoc); NamedDecl *ActOnFriendFunctionDecl(Scope *S, Declarator &D, MultiTemplateParamsArg TemplateParams); @@ -5852,6 +5854,7 @@ class Sema final : public SemaBase { unsigned TagSpec, SourceLocation TagLoc, CXXScopeSpec &SS, IdentifierInfo *Name, SourceLocation NameLoc, + SourceLocation EllipsisLoc, const ParsedAttributesView &Attr, MultiTemplateParamsArg TempParamLists); diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 198bc34a9f031bb..3bc0a647ebf94ff 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -4429,11 +4429,14 @@ ExpectedDecl ASTNodeImporter::VisitFriendDecl(FriendDecl *D) { auto FriendLocOrErr = import(D->getFriendLoc()); if (!FriendLocOrErr) return FriendLocOrErr.takeError(); + auto EllipsisLocOrErr = import(D->getEllipsisLoc()); + if (!EllipsisLocOrErr) + return EllipsisLocOrErr.takeError(); FriendDecl *FrD; if (GetImportedOrCreateDecl(FrD, D, Importer.getToContext(), DC, - *LocationOrErr, ToFU, - *FriendLocOrErr, ToTPLists)) + *LocationOrErr, ToFU, *FriendLocOrErr, + *EllipsisLocOrErr, ToTPLists)) return FrD; FrD->setAccess(D->getAccess()); diff --git a/clang/lib/AST/DeclFriend.cpp b/clang/lib/AST/DeclFriend.cpp index 04b9b93699f36ce..8b285bfce8d5221 100644 --- a/clang/lib/AST/DeclFriend.cpp +++ b/clang/lib/AST/DeclFriend.cpp @@ -31,11 +31,11 @@ FriendDecl *FriendDecl::getNextFriendSlowCase() { NextFriend.get(getASTContext().getExternalSource())); } -FriendDecl *FriendDecl::Create(ASTContext &C, DeclContext *DC, - SourceLocation L, - FriendUnion Friend, - SourceLocation FriendL, - ArrayRef FriendTypeTPLists) { +FriendDecl * +FriendDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L, + FriendUnion Friend, SourceLocation FriendL, + SourceLocation EllipsisLoc, + ArrayRef FriendTypeTPLists) { #ifndef NDEBUG if (Friend.is()) { const auto *D = Friend.get(); @@ -56,8 +56,8 @@ FriendDecl *FriendDecl::Create(ASTContext &C, DeclContext *DC, std::size_t Extra = FriendDecl::additionalSizeToAlloc( FriendTypeTPLists.size()); - auto *FD = new (C, DC, Extra) FriendDecl(DC, L, Friend, FriendL, - FriendTypeTPLists); + auto *FD = new (C, DC, Extra) + FriendDecl(DC, L, Friend, FriendL, EllipsisLoc, FriendTypeTPLists); cast(DC)->pushFriendDecl(FD); return FD; } diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp index 26773a69ab9acff..07be813abd8adca 100644 --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -868,7 +868,7 @@ void DeclPrinter::VisitFriendDecl(FriendDecl *D) { for (unsigned i = 0; i < NumTPLists; ++i) printTemplateParameters(D->getFriendTypeTemplateParameterList(i)); Out << "friend "; - Out << " " << TSI->getType().getAsString(Policy); + Out << TSI->getType().getAsString(Policy); } else if (FunctionDecl *FD = dyn_cast(D->getFriendDecl())) { @@ -885,6 +885,9 @@ void DeclPrinter::VisitFriendDecl(FriendDecl *D) { Out << "friend "; VisitRedeclarableTemplateDecl(CTD); } + + if (D->isPackExpansion()) + Out << "..."; } void DeclPrinter::VisitFieldDecl(FieldDecl *D) { diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index f8f80c8c2515755..565f1e05710c879 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -1090,6 +1090,7 @@ void JSONNodeDumper::VisitAccessSpecDecl(const AccessSpecDecl *ASD) { void JSONNodeDumper::VisitFriendDecl(const FriendDecl *FD) { if (const TypeSourceInfo *T = FD->getFriendType()) JOS.attribute("type", createQualType(T->getType())); + attributeOnlyIfTrue("isPackExpansion", FD->isPackExpansion()); } void JSONNodeDumper::VisitObjCIvarDecl(const ObjCIvarDecl *D) { diff --git a/clang/lib/AST/ODRHash.cpp b/clang/lib/AST/ODRHash.cpp index fbfe92318dc5eee..b748093831e3f50 100644 --- a/clang/lib/AST/ODRHash.cpp +++ b/clang/lib/AST/ODRHash.cpp @@ -461,6 +461,7 @@ class ODRDeclVisitor : public ConstDeclVisitor { } else { AddDecl(D->getFriendDecl()); } + Hash.AddBoolean(D->isPackExpansion()); } void VisitTemplateTypeParmDecl(const TemplateTypeParmDecl *D) { diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index d50d4c7028c6972..2c962253c8bea42 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -2697,6 +2697,8 @@ void TextNodeDumper::VisitAccessSpecDecl(const AccessSpecDecl *D) { void TextNodeDumper::VisitFriendDecl(const FriendDecl *D) { if (TypeSourceInfo *T = D->getFriendType()) dumpType(T->getType()); + if (D->isPackExpansion()) + OS << "..."; } void TextNodeDumper::VisitObjCIvarDecl(const ObjCIvarDecl *D) { diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 8e62461d8a1818b..4f2856dd2247f8f 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -765,6 +765,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, // C++26 features supported in earlier language modes. Builder.defineMacro("__cpp_pack_indexing", "202311L"); Builder.defineMacro("__cpp_deleted_function", "202403L"); + Builder.defineMacro("__cpp_variadic_friend", "202403L"); if (LangOpts.Char8) Builder.defineMacro("__cpp_char8_t", "202207L"); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index d45a738fe4c5969..18c5fe6056b4726 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -452,7 +452,7 @@ Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) { /// /// export-function-declaration: /// 'export' function-declaration -/// +/// /// export-declaration-group: /// 'export' '{' function-declaration-seq[opt] '}' /// @@ -2007,9 +2007,16 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, const PrintingPolicy &Policy = Actions.getASTContext().getPrintingPolicy(); TagUseKind TUK; - if (isDefiningTypeSpecifierContext(DSC, getLangOpts().CPlusPlus) == - AllowDefiningTypeSpec::No || - (getLangOpts().OpenMP && OpenMPDirectiveParsing)) + + // C++26 [class.mem.general]p10: If a name-declaration matches the + // syntactic requirements of friend-type-declaration, it is a + // friend-type-declaration. + if (getLangOpts().CPlusPlus && DS.isFriendSpecifiedFirst() && + Tok.isOneOf(tok::comma, tok::ellipsis)) + TUK = TagUseKind::Friend; + else if (isDefiningTypeSpecifierContext(DSC, getLangOpts().CPlusPlus) == + AllowDefiningTypeSpec::No || + (getLangOpts().OpenMP && OpenMPDirectiveParsing)) TUK = TagUseKind::Reference; else if (Tok.is(tok::l_brace) || (DSC != DeclSpecContext::DSC_association && @@ -2241,9 +2248,28 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, diag::err_keyword_not_allowed, /*DiagnoseEmptyAttrs=*/true); + // Consume '...' first so we error on the ',' after it if there is one. + SourceLocation EllipsisLoc; + TryConsumeToken(tok::ellipsis, EllipsisLoc); + + // CWG 2917: In a template-declaration whose declaration is a + // friend-type-declaration, the friend-type-specifier-list shall + // consist of exactly one friend-type-specifier. + // + // Essentially, the following is obviously nonsense, so disallow it: + // + // template + // friend class S, int; + // + if (Tok.is(tok::comma)) { + Diag(Tok.getLocation(), + diag::err_friend_template_decl_multiple_specifiers); + SkipUntil(tok::semi, StopBeforeMatch); + } + TagOrTempResult = Actions.ActOnTemplatedFriendTag( getCurScope(), DS.getFriendSpecLoc(), TagType, StartLoc, SS, Name, - NameLoc, attrs, + NameLoc, EllipsisLoc, attrs, MultiTemplateParamsArg(TemplateParams ? &(*TemplateParams)[0] : nullptr, TemplateParams ? TemplateParams->size() : 0)); } else { @@ -2818,6 +2844,7 @@ void Parser::MaybeParseAndDiagnoseDeclSpecAfterCXX11VirtSpecifierSeq( /// member-declaration: /// decl-specifier-seq[opt] member-declarator-list[opt] ';' /// function-definition ';'[opt] +/// [C++26] friend-type-declaration /// ::[opt] nested-name-specifier template[opt] unqualified-id ';'[TODO] /// using-declaration [TODO] /// [C++0x] static_assert-declaration @@ -2850,6 +2877,18 @@ void Parser::MaybeParseAndDiagnoseDeclSpecAfterCXX11VirtSpecifierSeq( /// constant-initializer: /// '=' constant-expression /// +/// friend-type-declaration: +/// 'friend' friend-type-specifier-list ; +/// +/// friend-type-specifier-list: +/// friend-type-specifier ...[opt] +/// friend-type-specifier-list , friend-type-specifier ...[opt] +/// +/// friend-type-specifier: +/// simple-type-specifier +/// elaborated-type-specifier +/// typename-specifier +/// Parser::DeclGroupPtrTy Parser::ParseCXXClassMemberDeclaration( AccessSpecifier AS, ParsedAttributes &AccessAttrs, ParsedTemplateInfo &TemplateInfo, ParsingDeclRAIIObject *TemplateDiags) { @@ -3051,6 +3090,55 @@ Parser::DeclGroupPtrTy Parser::ParseCXXClassMemberDeclaration( if (DS.hasTagDefinition()) Actions.ActOnDefinedDeclarationSpecifier(DS.getRepAsDecl()); + // Handle C++26's variadic friend declarations. These don't even have + // declarators, so we get them out of the way early here. + if (DS.isFriendSpecifiedFirst() && Tok.isOneOf(tok::comma, tok::ellipsis)) { + Diag(Tok.getLocation(), getLangOpts().CPlusPlus26 + ? diag::warn_cxx23_variadic_friends + : diag::ext_variadic_friends); + + SourceLocation FriendLoc = DS.getFriendSpecLoc(); + SmallVector Decls; + + // Handles a single friend-type-specifier. + auto ParsedFriendDecl = [&](ParsingDeclSpec &DeclSpec) { + SourceLocation VariadicLoc; + TryConsumeToken(tok::ellipsis, VariadicLoc); + + RecordDecl *AnonRecord = nullptr; + Decl *D = Actions.ParsedFreeStandingDeclSpec( + getCurScope(), AS, DeclSpec, DeclAttrs, TemplateParams, false, + AnonRecord, VariadicLoc); + DeclSpec.complete(D); + if (!D) { + SkipUntil(tok::semi, tok::r_brace); + return true; + } + + Decls.push_back(D); + return false; + }; + + if (ParsedFriendDecl(DS)) + return nullptr; + + while (TryConsumeToken(tok::comma)) { + ParsingDeclSpec DeclSpec(*this, TemplateDiags); + const char *PrevSpec = nullptr; + unsigned DiagId = 0; + DeclSpec.SetFriendSpec(FriendLoc, PrevSpec, DiagId); + ParseDeclarationSpecifiers(DeclSpec, TemplateInfo, AS, + DeclSpecContext::DSC_class, nullptr); + if (ParsedFriendDecl(DeclSpec)) + return nullptr; + } + + ExpectAndConsume(tok::semi, diag::err_expected_semi_after_stmt, + "friend declaration"); + + return Actions.BuildDeclaratorGroup(Decls); + } + ParsingDeclarator DeclaratorInfo(*this, DS, DeclAttrs, DeclaratorContext::Member); if (TemplateInfo.TemplateParams) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 641b180527da558..83a652691e2e034 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -5000,7 +5000,8 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS, const ParsedAttributesView &DeclAttrs, MultiTemplateParamsArg TemplateParams, bool IsExplicitInstantiation, - RecordDecl *&AnonRecord) { + RecordDecl *&AnonRecord, + SourceLocation EllipsisLoc) { Decl *TagD = nullptr; TagDecl *Tag = nullptr; if (DS.getTypeSpecType() == DeclSpec::TST_class || @@ -5067,9 +5068,12 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS, // whatever routines created it handled the friendship aspect. if (TagD && !Tag) return nullptr; - return ActOnFriendTypeDecl(S, DS, TemplateParams); + return ActOnFriendTypeDecl(S, DS, TemplateParams, EllipsisLoc); } + assert(EllipsisLoc.isInvalid() && + "Friend ellipsis but not friend-specified?"); + // Track whether this decl-specifier declares anything. bool DeclaresAnything = true; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 9ca91a2def39f5c..e05595e565d54a4 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -17386,7 +17386,8 @@ Decl *Sema::BuildStaticAssertDeclaration(SourceLocation StaticAssertLoc, DeclResult Sema::ActOnTemplatedFriendTag( Scope *S, SourceLocation FriendLoc, unsigned TagSpec, SourceLocation TagLoc, CXXScopeSpec &SS, IdentifierInfo *Name, SourceLocation NameLoc, - const ParsedAttributesView &Attr, MultiTemplateParamsArg TempParamLists) { + SourceLocation EllipsisLoc, const ParsedAttributesView &Attr, + MultiTemplateParamsArg TempParamLists) { TagTypeKind Kind = TypeWithKeyword::getTagTypeKindForTypeSpec(TagSpec); bool IsMemberSpecialization = false; @@ -17430,6 +17431,7 @@ DeclResult Sema::ActOnTemplatedFriendTag( // If it's explicit specializations all the way down, just forget // about the template header and build an appropriate non-templated // friend. TODO: for source fidelity, remember the headers. + NestedNameSpecifierLoc QualifierLoc = SS.getWithLocInContext(Context); if (isAllExplicitSpecializations) { if (SS.isEmpty()) { bool Owned = false; @@ -17445,7 +17447,6 @@ DeclResult Sema::ActOnTemplatedFriendTag( /*IsTemplateParamOrArg=*/false, /*OOK=*/OOK_Outside); } - NestedNameSpecifierLoc QualifierLoc = SS.getWithLocInContext(Context); ElaboratedTypeKeyword Keyword = TypeWithKeyword::getKeywordForTagTypeKind(Kind); QualType T = CheckTypenameType(Keyword, TagLoc, QualifierLoc, @@ -17467,8 +17468,9 @@ DeclResult Sema::ActOnTemplatedFriendTag( TL.getNamedTypeLoc().castAs().setNameLoc(NameLoc); } - FriendDecl *Friend = FriendDecl::Create(Context, CurContext, NameLoc, - TSI, FriendLoc, TempParamLists); + FriendDecl *Friend = + FriendDecl::Create(Context, CurContext, NameLoc, TSI, FriendLoc, + EllipsisLoc, TempParamLists); Friend->setAccess(AS_public); CurContext->addDecl(Friend); return Friend; @@ -17476,7 +17478,22 @@ DeclResult Sema::ActOnTemplatedFriendTag( assert(SS.isNotEmpty() && "valid templated tag with no SS and no direct?"); - + // CWG 2917: if it (= the friend-type-specifier) is a pack expansion + // (13.7.4 [temp.variadic]), any packs expanded by that pack expansion + // shall not have been introduced by the template-declaration. + SmallVector Unexpanded; + collectUnexpandedParameterPacks(QualifierLoc, Unexpanded); + unsigned FriendDeclDepth = TempParamLists.front()->getDepth(); + for (UnexpandedParameterPack &U : Unexpanded) { + if (getDepthAndIndex(U).first >= FriendDeclDepth) { + auto *ND = U.first.dyn_cast(); + if (!ND) + ND = U.first.get()->getDecl(); + Diag(U.second, diag::friend_template_decl_malformed_pack_expansion) + << ND->getDeclName() << SourceRange(SS.getBeginLoc(), EllipsisLoc); + return true; + } + } // Handle the case of a templated-scope friend class. e.g. // template class A::B; @@ -17491,8 +17508,9 @@ DeclResult Sema::ActOnTemplatedFriendTag( TL.setQualifierLoc(SS.getWithLocInContext(Context)); TL.setNameLoc(NameLoc); - FriendDecl *Friend = FriendDecl::Create(Context, CurContext, NameLoc, - TSI, FriendLoc, TempParamLists); + FriendDecl *Friend = + FriendDecl::Create(Context, CurContext, NameLoc, TSI, FriendLoc, + EllipsisLoc, TempParamLists); Friend->setAccess(AS_public); Friend->setUnsupportedFriend(true); CurContext->addDecl(Friend); @@ -17500,7 +17518,8 @@ DeclResult Sema::ActOnTemplatedFriendTag( } Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, - MultiTemplateParamsArg TempParams) { + MultiTemplateParamsArg TempParams, + SourceLocation EllipsisLoc) { SourceLocation Loc = DS.getBeginLoc(); SourceLocation FriendLoc = DS.getFriendSpecLoc(); @@ -17541,8 +17560,18 @@ Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, if (TheDeclarator.isInvalidType()) return nullptr; - if (DiagnoseUnexpandedParameterPack(Loc, TSI, UPPC_FriendDeclaration)) + // If '...' is present, the type must contain an unexpanded parameter + // pack, and vice versa. + bool Invalid = false; + if (EllipsisLoc.isInvalid() && + DiagnoseUnexpandedParameterPack(Loc, TSI, UPPC_FriendDeclaration)) return nullptr; + if (EllipsisLoc.isValid() && + !TSI->getType()->containsUnexpandedParameterPack()) { + Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs) + << TSI->getTypeLoc().getSourceRange(); + Invalid = true; + } if (!T->isElaboratedTypeSpecifier()) { if (TempParams.size()) { @@ -17588,11 +17617,12 @@ Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, Decl *D; if (!TempParams.empty()) + // TODO: Support variadic friend template decls? D = FriendTemplateDecl::Create(Context, CurContext, Loc, TempParams, TSI, FriendLoc); else D = FriendDecl::Create(Context, CurContext, TSI->getTypeLoc().getBeginLoc(), - TSI, FriendLoc); + TSI, FriendLoc, EllipsisLoc); if (!D) return nullptr; @@ -17600,6 +17630,9 @@ Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, D->setAccess(AS_public); CurContext->addDecl(D); + if (Invalid) + D->setInvalidDecl(); + return D; } diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index f93cd113988ae42..14ca29f1bc3f1aa 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1442,8 +1442,47 @@ Decl *TemplateDeclInstantiator::VisitFriendDecl(FriendDecl *D) { if (D->isUnsupportedFriend()) { InstTy = Ty; } else { - InstTy = SemaRef.SubstType(Ty, TemplateArgs, - D->getLocation(), DeclarationName()); + if (D->isPackExpansion()) { + SmallVector Unexpanded; + SemaRef.collectUnexpandedParameterPacks(Ty->getTypeLoc(), Unexpanded); + assert(!Unexpanded.empty() && "Pack expansion without packs"); + + bool ShouldExpand = true; + bool RetainExpansion = false; + std::optional NumExpansions; + if (SemaRef.CheckParameterPacksForExpansion( + D->getEllipsisLoc(), D->getSourceRange(), Unexpanded, + TemplateArgs, ShouldExpand, RetainExpansion, NumExpansions)) + return nullptr; + + assert(!RetainExpansion && + "should never retain an expansion for a variadic friend decl"); + + if (ShouldExpand) { + SmallVector Decls; + for (unsigned I = 0; I != *NumExpansions; I++) { + Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I); + TypeSourceInfo *TSI = SemaRef.SubstType( + Ty, TemplateArgs, D->getEllipsisLoc(), DeclarationName()); + if (!TSI) + return nullptr; + + auto FD = + FriendDecl::Create(SemaRef.Context, Owner, D->getLocation(), + TSI, D->getFriendLoc()); + + FD->setAccess(AS_public); + Owner->addDecl(FD); + Decls.push_back(FD); + } + + // Just drop this node; we have no use for it anymore. + return nullptr; + } + } + + InstTy = SemaRef.SubstType(Ty, TemplateArgs, D->getLocation(), + DeclarationName()); } if (!InstTy) return nullptr; diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index a9199f7e50f5dc8..ef160228933c598 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -2361,6 +2361,7 @@ void ASTDeclReader::VisitFriendDecl(FriendDecl *D) { D->NextFriend = readDeclID().getRawValue(); D->UnsupportedFriend = (Record.readInt() != 0); D->FriendLoc = readSourceLocation(); + D->EllipsisLoc = readSourceLocation(); } void ASTDeclReader::VisitFriendTemplateDecl(FriendTemplateDecl *D) { diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 8a4ca54349e38f1..555f6325da646bf 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -1660,6 +1660,7 @@ void ASTDeclWriter::VisitFriendDecl(FriendDecl *D) { Record.AddDeclRef(D->getNextFriend()); Record.push_back(D->UnsupportedFriend); Record.AddSourceLocation(D->FriendLoc); + Record.AddSourceLocation(D->EllipsisLoc); Code = serialization::DECL_FRIEND; } diff --git a/clang/test/AST/ast-dump-funcs-json.cpp b/clang/test/AST/ast-dump-funcs-json.cpp index 041d98f2713d32e..957df5cea6ec534 100644 --- a/clang/test/AST/ast-dump-funcs-json.cpp +++ b/clang/test/AST/ast-dump-funcs-json.cpp @@ -41,13 +41,18 @@ int main() { Test1(); // Causes this to be marked 'used' } +template +struct TestFriends { + friend Ts...; +}; + // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 124, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 4, // CHECK-NEXT: "col": 8, @@ -55,12 +60,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 119, // CHECK-NEXT: "col": 3, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 130, // CHECK-NEXT: "col": 14, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -76,7 +81,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 140, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 5, // CHECK-NEXT: "col": 8, @@ -84,12 +89,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 135, // CHECK-NEXT: "col": 3, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 148, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 5 // CHECK-NEXT: } @@ -105,7 +110,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 162, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 6, // CHECK-NEXT: "col": 8, @@ -113,12 +118,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 157, // CHECK-NEXT: "col": 3, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 170, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 8 // CHECK-NEXT: } @@ -134,7 +139,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 187, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 7, // CHECK-NEXT: "col": 8, @@ -142,12 +147,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 182, // CHECK-NEXT: "col": 3, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 195, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -163,7 +168,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 205, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 8, // CHECK-NEXT: "col": 8, @@ -171,12 +176,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 200, // CHECK-NEXT: "col": 3, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 213, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 2 // CHECK-NEXT: } @@ -192,7 +197,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 232, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 9, // CHECK-NEXT: "col": 16, @@ -200,12 +205,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 219, // CHECK-NEXT: "col": 3, // CHECK-NEXT: "tokLen": 7 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 253, // CHECK-NEXT: "col": 37, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -221,18 +226,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 243, // CHECK-NEXT: "col": 27, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 238, // CHECK-NEXT: "col": 22, // CHECK-NEXT: "tokLen": 5 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 238, // CHECK-NEXT: "col": 22, // CHECK-NEXT: "tokLen": 5 // CHECK-NEXT: } @@ -245,18 +250,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 249, // CHECK-NEXT: "col": 33, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 245, // CHECK-NEXT: "col": 29, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 251, // CHECK-NEXT: "col": 35, // CHECK-NEXT: "tokLen": 2 // CHECK-NEXT: } @@ -271,12 +276,12 @@ int main() { // CHECK-NEXT: "kind": "IntegerLiteral", // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 251, // CHECK-NEXT: "col": 35, // CHECK-NEXT: "tokLen": 2 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 251, // CHECK-NEXT: "col": 35, // CHECK-NEXT: "tokLen": 2 // CHECK-NEXT: } @@ -296,7 +301,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 271, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 10, // CHECK-NEXT: "col": 16, @@ -304,12 +309,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 258, // CHECK-NEXT: "col": 3, // CHECK-NEXT: "tokLen": 7 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 281, // CHECK-NEXT: "col": 26, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -327,7 +332,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 343, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 14, // CHECK-NEXT: "col": 8, @@ -335,12 +340,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 338, // CHECK-NEXT: "col": 3, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 367, // CHECK-NEXT: "col": 32, // CHECK-NEXT: "tokLen": 8 // CHECK-NEXT: } @@ -355,18 +360,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 354, // CHECK-NEXT: "col": 19, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 349, // CHECK-NEXT: "col": 14, // CHECK-NEXT: "tokLen": 5 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 349, // CHECK-NEXT: "col": 14, // CHECK-NEXT: "tokLen": 5 // CHECK-NEXT: } @@ -379,18 +384,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 360, // CHECK-NEXT: "col": 25, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 356, // CHECK-NEXT: "col": 21, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 362, // CHECK-NEXT: "col": 27, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: } @@ -405,12 +410,12 @@ int main() { // CHECK-NEXT: "kind": "IntegerLiteral", // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 362, // CHECK-NEXT: "col": 27, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 362, // CHECK-NEXT: "col": 27, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: } @@ -428,12 +433,12 @@ int main() { // CHECK-NEXT: "kind": "OverrideAttr", // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 367, // CHECK-NEXT: "col": 32, // CHECK-NEXT: "tokLen": 8 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 367, // CHECK-NEXT: "col": 32, // CHECK-NEXT: "tokLen": 8 // CHECK-NEXT: } @@ -446,7 +451,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 399, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 18, // CHECK-NEXT: "col": 8, @@ -454,12 +459,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 394, // CHECK-NEXT: "col": 3, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 405, // CHECK-NEXT: "col": 14, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -475,7 +480,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 419, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 20, // CHECK-NEXT: "col": 9, @@ -483,12 +488,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 411, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 428, // CHECK-NEXT: "col": 18, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -506,12 +511,12 @@ int main() { // CHECK-NEXT: "kind": "CompoundStmt", // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 427, // CHECK-NEXT: "col": 17, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 428, // CHECK-NEXT: "col": 18, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -524,7 +529,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 446, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 22, // CHECK-NEXT: "col": 6, @@ -532,12 +537,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 441, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 452, // CHECK-NEXT: "col": 12, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -554,7 +559,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 460, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 23, // CHECK-NEXT: "col": 6, @@ -562,12 +567,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 455, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 470, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -583,7 +588,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 478, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 24, // CHECK-NEXT: "col": 6, @@ -591,12 +596,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 473, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 496, // CHECK-NEXT: "col": 24, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -611,18 +616,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 488, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 484, // CHECK-NEXT: "col": 12, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 488, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -636,18 +641,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 495, // CHECK-NEXT: "col": 23, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 491, // CHECK-NEXT: "col": 19, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 495, // CHECK-NEXT: "col": 23, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -664,7 +669,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 504, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 25, // CHECK-NEXT: "col": 6, @@ -672,12 +677,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 499, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 527, // CHECK-NEXT: "col": 29, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -692,18 +697,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 514, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 510, // CHECK-NEXT: "col": 12, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 514, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -717,18 +722,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 521, // CHECK-NEXT: "col": 23, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 517, // CHECK-NEXT: "col": 19, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 525, // CHECK-NEXT: "col": 27, // CHECK-NEXT: "tokLen": 2 // CHECK-NEXT: } @@ -744,12 +749,12 @@ int main() { // CHECK-NEXT: "kind": "IntegerLiteral", // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 525, // CHECK-NEXT: "col": 27, // CHECK-NEXT: "tokLen": 2 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 525, // CHECK-NEXT: "col": 27, // CHECK-NEXT: "tokLen": 2 // CHECK-NEXT: } @@ -769,7 +774,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 545, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 26, // CHECK-NEXT: "col": 16, @@ -777,12 +782,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 530, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 9 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 555, // CHECK-NEXT: "col": 26, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -799,7 +804,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 570, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 27, // CHECK-NEXT: "col": 13, @@ -807,12 +812,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 558, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 6 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 580, // CHECK-NEXT: "col": 23, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -829,7 +834,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 595, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 28, // CHECK-NEXT: "col": 13, @@ -837,12 +842,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 583, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 6 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 605, // CHECK-NEXT: "col": 23, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -859,7 +864,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 620, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 29, // CHECK-NEXT: "col": 13, @@ -867,12 +872,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 608, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 6 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 630, // CHECK-NEXT: "col": 23, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -889,7 +894,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 638, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 30, // CHECK-NEXT: "col": 6, @@ -897,12 +902,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 633, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 650, // CHECK-NEXT: "col": 18, // CHECK-NEXT: "tokLen": 8 // CHECK-NEXT: } @@ -918,7 +923,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 665, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 31, // CHECK-NEXT: "col": 6, @@ -926,12 +931,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 660, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 692, // CHECK-NEXT: "col": 33, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -947,7 +952,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 700, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 32, // CHECK-NEXT: "col": 6, @@ -955,12 +960,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 695, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 723, // CHECK-NEXT: "col": 29, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -976,7 +981,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 751, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 35, // CHECK-NEXT: "col": 3, @@ -984,13 +989,13 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 727, // CHECK-NEXT: "line": 34, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 8 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 760, // CHECK-NEXT: "line": 35, // CHECK-NEXT: "col": 12, // CHECK-NEXT: "tokLen": 1 @@ -1002,19 +1007,19 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "TemplateTypeParmDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 746, // CHECK-NEXT: "line": 34, // CHECK-NEXT: "col": 20, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 737, // CHECK-NEXT: "col": 11, // CHECK-NEXT: "tokLen": 8 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 746, // CHECK-NEXT: "col": 20, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -1029,19 +1034,19 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 751, // CHECK-NEXT: "line": 35, // CHECK-NEXT: "col": 3, // CHECK-NEXT: "tokLen": 6 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 749, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 760, // CHECK-NEXT: "col": 12, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -1055,18 +1060,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 760, // CHECK-NEXT: "col": 12, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 758, // CHECK-NEXT: "col": 10, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 759, // CHECK-NEXT: "col": 11, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -1084,7 +1089,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 769, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 37, // CHECK-NEXT: "col": 6, @@ -1092,12 +1097,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 764, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 782, // CHECK-NEXT: "col": 19, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -1112,18 +1117,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 779, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 776, // CHECK-NEXT: "col": 13, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 776, // CHECK-NEXT: "col": 13, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: } @@ -1137,12 +1142,12 @@ int main() { // CHECK-NEXT: "kind": "CompoundStmt", // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 781, // CHECK-NEXT: "col": 18, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 782, // CHECK-NEXT: "col": 19, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -1155,7 +1160,7 @@ int main() { // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 789, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 38, // CHECK-NEXT: "col": 6, @@ -1163,12 +1168,12 @@ int main() { // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 784, // CHECK-NEXT: "col": 1, // CHECK-NEXT: "tokLen": 4 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 807, // CHECK-NEXT: "col": 24, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -1184,18 +1189,18 @@ int main() { // CHECK-NEXT: "id": "0x{{.*}}", // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 799, // CHECK-NEXT: "col": 16, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 796, // CHECK-NEXT: "col": 13, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 796, // CHECK-NEXT: "col": 13, // CHECK-NEXT: "tokLen": 3 // CHECK-NEXT: } @@ -1209,12 +1214,12 @@ int main() { // CHECK-NEXT: "kind": "CompoundStmt", // CHECK-NEXT: "range": { // CHECK-NEXT: "begin": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 806, // CHECK-NEXT: "col": 23, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, // CHECK-NEXT: "end": { -// CHECK-NEXT: "offset": {{[0-9]+}}, +// CHECK-NEXT: "offset": 807, // CHECK-NEXT: "col": 24, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: } @@ -1222,3 +1227,189 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: ] // CHECK-NEXT: } + + +// CHECK-NOT: {{^}}Dumping +// CHECK: "kind": "ClassTemplateDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 905, +// CHECK-NEXT: "file": "{{.*}}", +// CHECK-NEXT: "line": 45, +// CHECK-NEXT: "col": 8, +// CHECK-NEXT: "tokLen": 11 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 872, +// CHECK-NEXT: "line": 44, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 8 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 937, +// CHECK-NEXT: "line": 47, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "name": "TestFriends", +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "TemplateTypeParmDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 894, +// CHECK-NEXT: "line": 44, +// CHECK-NEXT: "col": 23, +// CHECK-NEXT: "tokLen": 2 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 882, +// CHECK-NEXT: "col": 11, +// CHECK-NEXT: "tokLen": 8 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 894, +// CHECK-NEXT: "col": 23, +// CHECK-NEXT: "tokLen": 2 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "isReferenced": true, +// CHECK-NEXT: "name": "Ts", +// CHECK-NEXT: "tagUsed": "typename", +// CHECK-NEXT: "depth": 0, +// CHECK-NEXT: "index": 0, +// CHECK-NEXT: "isParameterPack": true +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "CXXRecordDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 905, +// CHECK-NEXT: "line": 45, +// CHECK-NEXT: "col": 8, +// CHECK-NEXT: "tokLen": 11 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 898, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 6 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 937, +// CHECK-NEXT: "line": 47, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "name": "TestFriends", +// CHECK-NEXT: "tagUsed": "struct", +// CHECK-NEXT: "completeDefinition": true, +// CHECK-NEXT: "definitionData": { +// CHECK-NEXT: "canConstDefaultInit": true, +// CHECK-NEXT: "copyAssign": { +// CHECK-NEXT: "hasConstParam": true, +// CHECK-NEXT: "implicitHasConstParam": true, +// CHECK-NEXT: "needsImplicit": true, +// CHECK-NEXT: "simple": true, +// CHECK-NEXT: "trivial": true +// CHECK-NEXT: }, +// CHECK-NEXT: "copyCtor": { +// CHECK-NEXT: "hasConstParam": true, +// CHECK-NEXT: "implicitHasConstParam": true, +// CHECK-NEXT: "needsImplicit": true, +// CHECK-NEXT: "simple": true, +// CHECK-NEXT: "trivial": true +// CHECK-NEXT: }, +// CHECK-NEXT: "defaultCtor": { +// CHECK-NEXT: "defaultedIsConstexpr": true, +// CHECK-NEXT: "exists": true, +// CHECK-NEXT: "isConstexpr": true, +// CHECK-NEXT: "needsImplicit": true, +// CHECK-NEXT: "trivial": true +// CHECK-NEXT: }, +// CHECK-NEXT: "dtor": { +// CHECK-NEXT: "irrelevant": true, +// CHECK-NEXT: "needsImplicit": true, +// CHECK-NEXT: "simple": true, +// CHECK-NEXT: "trivial": true +// CHECK-NEXT: }, +// CHECK-NEXT: "hasConstexprNonCopyMoveConstructor": true, +// CHECK-NEXT: "isAggregate": true, +// CHECK-NEXT: "isEmpty": true, +// CHECK-NEXT: "isLiteral": true, +// CHECK-NEXT: "isPOD": true, +// CHECK-NEXT: "isStandardLayout": true, +// CHECK-NEXT: "isTrivial": true, +// CHECK-NEXT: "isTriviallyCopyable": true, +// CHECK-NEXT: "moveAssign": { +// CHECK-NEXT: "exists": true, +// CHECK-NEXT: "needsImplicit": true, +// CHECK-NEXT: "simple": true, +// CHECK-NEXT: "trivial": true +// CHECK-NEXT: }, +// CHECK-NEXT: "moveCtor": { +// CHECK-NEXT: "exists": true, +// CHECK-NEXT: "needsImplicit": true, +// CHECK-NEXT: "simple": true, +// CHECK-NEXT: "trivial": true +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "CXXRecordDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 905, +// CHECK-NEXT: "line": 45, +// CHECK-NEXT: "col": 8, +// CHECK-NEXT: "tokLen": 11 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 898, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 6 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 905, +// CHECK-NEXT: "col": 8, +// CHECK-NEXT: "tokLen": 11 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "isImplicit": true, +// CHECK-NEXT: "name": "TestFriends", +// CHECK-NEXT: "tagUsed": "struct" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "FriendDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 930, +// CHECK-NEXT: "line": 46, +// CHECK-NEXT: "col": 12, +// CHECK-NEXT: "tokLen": 2 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 923, +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 6 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 932, +// CHECK-NEXT: "col": 14, +// CHECK-NEXT: "tokLen": 3 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "Ts" +// CHECK-NEXT: }, +// CHECK-NEXT: "isPackExpansion": true +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } diff --git a/clang/test/AST/cxx2c-variadic-friends.cpp b/clang/test/AST/cxx2c-variadic-friends.cpp new file mode 100644 index 000000000000000..fc84e7346fe0364 --- /dev/null +++ b/clang/test/AST/cxx2c-variadic-friends.cpp @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 -fsyntax-only -ast-dump -std=c++2c %s | FileCheck %s +// RUN: %clang_cc1 -ast-print -std=c++2c %s | FileCheck %s --check-prefix=PRINT +// RUN: %clang_cc1 -emit-pch -std=c++2c -o %t %s +// RUN: %clang_cc1 -x c++ -std=c++2c -include-pch %t -ast-dump-all /dev/null + +struct S; +template struct TS; // #template + +// CHECK-LABEL: CXXRecordDecl {{.*}} struct Friends +// PRINT-LABEL: struct Friends { +struct Friends { + // CHECK: FriendDecl {{.*}} 'int' + // CHECK-NEXT: FriendDecl {{.*}} 'long' + // PRINT-NEXT: friend int; + // PRINT-NEXT: friend long; + friend int, long; + + // CHECK-NEXT: FriendDecl {{.*}} 'int' + // CHECK-NEXT: FriendDecl {{.*}} 'long' + // CHECK-NEXT: FriendDecl {{.*}} 'char' + // PRINT-NEXT: friend int; + // PRINT-NEXT: friend long; + // PRINT-NEXT: friend char; + friend int, long, char; + + // CHECK-NEXT: FriendDecl {{.*}} 'S' + // PRINT-NEXT: friend S; + friend S; + + // CHECK-NEXT: FriendDecl {{.*}} 'S' + // CHECK-NEXT: FriendDecl {{.*}} 'S' + // CHECK-NEXT: FriendDecl {{.*}} 'S' + // PRINT-NEXT: friend S; + // PRINT-NEXT: friend S; + // PRINT-NEXT: friend S; + friend S, S, S; + + // CHECK-NEXT: FriendDecl + // CHECK-NEXT: ClassTemplateDecl {{.*}} friend TS + // PRINT-NEXT: friend template struct TS; + template friend struct TS; +}; + +namespace specialisations { +template +struct C { + template struct Nested; +}; + +struct N { + template class C; +}; + +// CHECK-LABEL: ClassTemplateDecl {{.*}} Variadic +// PRINT-LABEL: template struct Variadic { +template struct Variadic { + // CHECK: FriendDecl {{.*}} 'Pack'... + // CHECK-NEXT: FriendDecl {{.*}} 'long' + // CHECK-NEXT: FriendDecl {{.*}} 'Pack'... + // PRINT-NEXT: friend Pack...; + // PRINT-NEXT: friend long; + // PRINT-NEXT: friend Pack...; + friend Pack..., long, Pack...; + + // CHECK-NEXT: FriendDecl {{.*}} 'TS'... + // PRINT-NEXT: friend TS...; + friend TS...; +}; + +// CHECK-LABEL: ClassTemplateDecl {{.*}} S2 +// PRINT-LABEL: template struct S2 { +template struct S2 { + // CHECK: FriendDecl {{.*}} 'class C':'C'... + // PRINT-NEXT: friend class C...; + friend class C...; + + // CHECK-NEXT: FriendDecl {{.*}} 'class N::C':'C'... + // PRINT-NEXT: friend class N::C... + friend class N::C...; +}; +} diff --git a/clang/test/CXX/drs/cwg29xx.cpp b/clang/test/CXX/drs/cwg29xx.cpp new file mode 100644 index 000000000000000..8cac9f283980b6b --- /dev/null +++ b/clang/test/CXX/drs/cwg29xx.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -std=c++98 -pedantic-errors -verify=expected,cxx98 %s +// RUN: %clang_cc1 -std=c++11 -pedantic-errors -verify=expected %s +// RUN: %clang_cc1 -std=c++14 -pedantic-errors -verify=expected %s +// RUN: %clang_cc1 -std=c++17 -pedantic-errors -verify=expected %s +// RUN: %clang_cc1 -std=c++20 -pedantic-errors -verify=expected %s +// RUN: %clang_cc1 -std=c++23 -pedantic-errors -verify=expected %s +// RUN: %clang_cc1 -std=c++2c -pedantic-errors -verify=expected %s + +namespace cwg2917 { // cwg2917: 20 open 2024-07-30 +template +class Foo; + +template // cxx98-error {{variadic templates are a C++11 extension}} +struct C { + struct Nested { }; +}; + +struct S { + template + friend class Foo, int; // expected-error {{a friend declaration that befriends a template must contain exactly one type-specifier}} + + template // cxx98-error {{variadic templates are a C++11 extension}} + friend class C::Nested...; // expected-error {{friend declaration expands pack 'Ts' that is declared it its own template parameter list}} +}; +} // namespace cwg2917 diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index 08b732132228bac..1c51013ca06f774 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -34,6 +34,10 @@ // --- C++26 features --- +#if check(variadic_friend, 202403, 202403, 202403, 202403, 202403, 202403, 202403) +#error "wrong value for __cpp_variadic_friend" +#endif + #if check(deleted_function, 202403, 202403, 202403, 202403, 202403, 202403, 202403) #error "wrong value for __cpp_deleted_function" #endif diff --git a/clang/test/Parser/cxx2c-variadic-friends-ext-diags.cpp b/clang/test/Parser/cxx2c-variadic-friends-ext-diags.cpp new file mode 100644 index 000000000000000..ffcc97ffd635298 --- /dev/null +++ b/clang/test/Parser/cxx2c-variadic-friends-ext-diags.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -std=c++2c -verify=compat -fsyntax-only -Wpre-c++26-compat %s +// RUN: %clang_cc1 -std=c++11 -verify=pre2c -fsyntax-only -Wc++26-extensions %s + +struct S { + friend int, long, char; // compat-warning {{variadic 'friend' declarations are incompatible with C++ standards before C++2c}} \ + // pre2c-warning {{variadic 'friend' declarations are a C++2c extension}} +}; + +template +struct TS { + friend Types...; // compat-warning {{variadic 'friend' declarations are incompatible with C++ standards before C++2c}} \ + // pre2c-warning {{variadic 'friend' declarations are a C++2c extension}} + + friend int, Types..., Types...; // compat-warning {{variadic 'friend' declarations are incompatible with C++ standards before C++2c}} \ + // pre2c-warning {{variadic 'friend' declarations are a C++2c extension}} +}; diff --git a/clang/test/Parser/cxx2c-variadic-friends.cpp b/clang/test/Parser/cxx2c-variadic-friends.cpp new file mode 100644 index 000000000000000..b7da3e611048129 --- /dev/null +++ b/clang/test/Parser/cxx2c-variadic-friends.cpp @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++2c %s + +template struct TS; // #template + +struct Errors { + friend int, int; + friend int, long, char; + + // We simply diagnose and ignore the '...' here. + friend float...; // expected-error {{pack expansion does not contain any unexpanded parameter packs}} + + friend short..., unsigned, unsigned short...; // expected-error 2 {{pack expansion does not contain any unexpanded parameter packs}} + + template + friend struct TS, int; // expected-error {{a friend declaration that befriends a template must contain exactly one type-specifier}} + + double friend; // expected-error {{'friend' must appear first in a non-function declaration}} + double friend, double; // expected-error {{expected member name or ';' after declaration specifiers}} +}; + +template +struct C { template class Nested; }; + +template +struct D { template class Nested; }; + +template +struct E { template class Nested; }; + +template // expected-note {{template parameter is declared here}} +struct VS { + friend Ts...; + + friend class Ts...; // expected-error {{declaration of 'Ts' shadows template parameter}} + // expected-error@-1 {{pack expansion does not contain any unexpanded parameter packs}} + + // TODO: Fix-it hint to insert '...'. + friend Ts; // expected-error {{friend declaration contains unexpanded parameter pack}} + + template + friend Us...; // expected-error {{friend type templates must use an elaborated type}} + + template // expected-note {{is declared here}} + friend class Us...; // expected-error {{declaration of 'Us' shadows template parameter}} + + template + friend class C::template Nested...; // expected-error {{cannot specialize a dependent template}} + + template + friend class C::template Nested...; // expected-error {{cannot specialize a dependent template}} + + // Nonsense (see CWG 2917). + template + friend class C::Nested...; // expected-error {{friend declaration expands pack 'Us' that is declared it its own template parameter list}} + + template + friend class E::Nested...; // expected-error {{friend declaration expands pack 'Bs' that is declared it its own template parameter list}} + + // FIXME: Both of these should be valid, but we can't handle these at + // the moment because the NNS is dependent. + template + friend class TS::Nested...; // expected-warning {{dependent nested name specifier 'TS::' for friend template declaration is not supported; ignoring this friend declaration}} + + template + friend class D::Nested...; // expected-warning {{dependent nested name specifier 'D::' for friend class declaration is not supported; turning off access control for 'VS'}} +}; + +namespace length_mismatch { +struct A { + template + struct Nested { + struct Foo{}; + }; +}; +template +struct S { + template + struct T { + // expected-error@+2 {{pack expansion contains parameter packs 'Ts' and 'Us' that have different lengths (1 vs. 2)}} + // expected-error@+1 {{pack expansion contains parameter packs 'Ts' and 'Us' that have different lengths (2 vs. 1)}} + friend class Ts::template Nested::Foo...; + }; +}; + +void f() { + S::T s; + S::T s2; + S::T s3; // expected-note {{in instantiation of}} + S::T s4; // expected-note {{in instantiation of}} +} +} diff --git a/clang/test/SemaCXX/cxx2c-variadic-friends.cpp b/clang/test/SemaCXX/cxx2c-variadic-friends.cpp new file mode 100644 index 000000000000000..a4d7c8078338d2a --- /dev/null +++ b/clang/test/SemaCXX/cxx2c-variadic-friends.cpp @@ -0,0 +1,156 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++2c %s + +struct A; +struct B; +struct C; + +struct S {}; +template struct TS {}; + +template +class X { + friend Pack...; + static void f() { } // expected-note {{declared private here}} +}; + +class Y { + friend A, B, C; + static void g() { } // expected-note {{declared private here}} +}; + +struct A { + A() { + X::f(); + Y::g(); + }; +}; + +struct B { + B() { + X::f(); + Y::g(); + }; +}; + +struct C { + C() { + X::f(); + Y::g(); + }; +}; + +struct D { + D() { + X::f(); // expected-error {{'f' is a private member of 'X'}} + Y::g(); // expected-error {{'g' is a private member of 'Y'}} + }; +}; + +void f1() { + A a; + B b; + C c; + D d; +} + +template +struct Z { + template