diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index 964a47346592fc..331a61e7c3c2cd 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -27,6 +27,21 @@ using namespace bolt; namespace opts { +static cl::opt + AltInstHasPadLen("alt-inst-has-padlen", + cl::desc("specify that .altinstructions has padlen field"), + cl::init(false), cl::Hidden, cl::cat(BoltCategory)); + +static cl::opt + AltInstFeatureSize("alt-inst-feature-size", + cl::desc("size of feature field in .altinstructions"), + cl::init(2), cl::Hidden, cl::cat(BoltCategory)); + +static cl::opt + DumpAltInstructions("dump-alt-instructions", + cl::desc("dump Linux alternative instructions info"), + cl::init(false), cl::Hidden, cl::cat(BoltCategory)); + static cl::opt DumpExceptions("dump-linux-exceptions", cl::desc("dump Linux kernel exception table"), @@ -157,6 +172,9 @@ class LinuxKernelRewriter final : public MetadataRewriter { /// Alignment of paravirtual patch structures. static constexpr size_t PARA_PATCH_ALIGN = 8; + /// .altinstructions section. + ErrorOr AltInstrSection = std::errc::bad_address; + /// Section containing Linux bug table. ErrorOr BugTableSection = std::errc::bad_address; @@ -205,6 +223,9 @@ class LinuxKernelRewriter final : public MetadataRewriter { Error readBugTable(); + /// Read alternative instruction info from .altinstructions. + Error readAltInstructions(); + /// Mark instructions referenced by kernel metadata. Error markInstructions(); @@ -232,6 +253,9 @@ class LinuxKernelRewriter final : public MetadataRewriter { if (Error E = readBugTable()) return E; + if (Error E = readAltInstructions()) + return E; + return Error::success(); } @@ -476,7 +500,8 @@ Error LinuxKernelRewriter::readORCTables() { // Consume the status of the cursor. if (!IPCursor) return createStringError(errc::executable_format_error, - "out of bounds while reading ORC IP table"); + "out of bounds while reading ORC IP table: %s", + toString(IPCursor.takeError()).c_str()); if (IP < PrevIP && opts::Verbosity) BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP) @@ -498,7 +523,8 @@ Error LinuxKernelRewriter::readORCTables() { // Consume the status of the cursor. if (!ORCCursor) return createStringError(errc::executable_format_error, - "out of bounds while reading ORC"); + "out of bounds while reading ORC: %s", + toString(ORCCursor.takeError()).c_str()); if (Entry.ORC == NullORC) continue; @@ -819,7 +845,8 @@ Error LinuxKernelRewriter::readStaticCalls() { // Consume the status of the cursor. if (!Cursor) return createStringError(errc::executable_format_error, - "out of bounds while reading static calls"); + "out of bounds while reading static calls: %s", + toString(Cursor.takeError()).c_str()); ++EntryID; @@ -930,8 +957,10 @@ Error LinuxKernelRewriter::readExceptionTable() { // Consume the status of the cursor. if (!Cursor) - return createStringError(errc::executable_format_error, - "out of bounds while reading exception table"); + return createStringError( + errc::executable_format_error, + "out of bounds while reading exception table: %s", + toString(Cursor.takeError()).c_str()); ++EntryID; @@ -1037,8 +1066,10 @@ Error LinuxKernelRewriter::readParaInstructions() { const uint8_t Len = DE.getU8(Cursor); if (!Cursor) - return createStringError(errc::executable_format_error, - "out of bounds while reading .parainstructions"); + return createStringError( + errc::executable_format_error, + "out of bounds while reading .parainstructions: %s", + toString(Cursor.takeError()).c_str()); ++EntryID; @@ -1105,7 +1136,8 @@ Error LinuxKernelRewriter::readBugTable() { if (!Cursor) return createStringError(errc::executable_format_error, - "out of bounds while reading __bug_table"); + "out of bounds while reading __bug_table: %s", + toString(Cursor.takeError()).c_str()); ++EntryID; @@ -1132,6 +1164,125 @@ Error LinuxKernelRewriter::readBugTable() { return Error::success(); } +/// The kernel can replace certain instruction sequences depending on hardware +/// it is running on and features specified during boot time. The information +/// about alternative instruction sequences is stored in .altinstructions +/// section. The format of entries in this section is defined in +/// arch/x86/include/asm/alternative.h: +/// +/// struct alt_instr { +/// s32 instr_offset; +/// s32 repl_offset; +/// uXX feature; +/// u8 instrlen; +/// u8 replacementlen; +/// u8 padlen; // present in older kernels +/// } __packed; +/// +/// Note the structures is packed. +Error LinuxKernelRewriter::readAltInstructions() { + AltInstrSection = BC.getUniqueSectionByName(".altinstructions"); + if (!AltInstrSection) + return Error::success(); + + const uint64_t Address = AltInstrSection->getAddress(); + DataExtractor DE = DataExtractor(AltInstrSection->getContents(), + BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + uint64_t EntryID = 0; + DataExtractor::Cursor Cursor(0); + while (Cursor && !DE.eof(Cursor)) { + const uint64_t OrgInstAddress = + Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); + const uint64_t AltInstAddress = + Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); + const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize); + const uint8_t OrgSize = DE.getU8(Cursor); + const uint8_t AltSize = DE.getU8(Cursor); + + // Older kernels may have the padlen field. + const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0; + + if (!Cursor) + return createStringError( + errc::executable_format_error, + "out of bounds while reading .altinstructions: %s", + toString(Cursor.takeError()).c_str()); + + ++EntryID; + + if (opts::DumpAltInstructions) { + BC.outs() << "Alternative instruction entry: " << EntryID + << "\n\tOrg: 0x" << Twine::utohexstr(OrgInstAddress) + << "\n\tAlt: 0x" << Twine::utohexstr(AltInstAddress) + << "\n\tFeature: 0x" << Twine::utohexstr(Feature) + << "\n\tOrgSize: " << (int)OrgSize + << "\n\tAltSize: " << (int)AltSize << '\n'; + if (opts::AltInstHasPadLen) + BC.outs() << "\tPadLen: " << (int)PadLen << '\n'; + } + + if (AltSize > OrgSize) + return createStringError(errc::executable_format_error, + "error reading .altinstructions"); + + BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress); + if (!BF && opts::Verbosity) { + BC.outs() << "BOLT-INFO: no function matches address 0x" + << Twine::utohexstr(OrgInstAddress) + << " of instruction from .altinstructions\n"; + } + + BinaryFunction *AltBF = + BC.getBinaryFunctionContainingAddress(AltInstAddress); + if (AltBF && BC.shouldEmit(*AltBF)) { + BC.errs() + << "BOLT-WARNING: alternative instruction sequence found in function " + << *AltBF << '\n'; + AltBF->setIgnored(); + } + + if (!BF || !BC.shouldEmit(*BF)) + continue; + + if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize()) + return createStringError(errc::executable_format_error, + "error reading .altinstructions"); + + MCInst *Inst = + BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress()); + if (!Inst) + return createStringError(errc::executable_format_error, + "no instruction at address 0x%" PRIx64 + " referenced by .altinstructions entry %d", + OrgInstAddress, EntryID); + + // There could be more than one alternative instruction sequences for the + // same original instruction. Annotate each alternative separately. + std::string AnnotationName = "AltInst"; + unsigned N = 2; + while (BC.MIB->hasAnnotation(*Inst, AnnotationName)) + AnnotationName = "AltInst" + std::to_string(N++); + + BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); + + // Annotate all instructions from the original sequence. Note that it's not + // the most efficient way to look for instructions in the address range, + // but since alternative instructions are uncommon, it will do for now. + for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) { + Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset - + BF->getAddress()); + if (Inst) + BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); + } + } + + BC.outs() << "BOLT-INFO: parsed " << EntryID + << " alternative instruction entries\n"; + + return Error::success(); +} + } // namespace std::unique_ptr diff --git a/bolt/test/X86/linux-alt-instruction.s b/bolt/test/X86/linux-alt-instruction.s new file mode 100644 index 00000000000000..5dcc6fe3ab0c81 --- /dev/null +++ b/bolt/test/X86/linux-alt-instruction.s @@ -0,0 +1,97 @@ +# REQUIRES: system-linux + +## Check that BOLT correctly parses the Linux kernel .altinstructions section +## and annotates alternative instructions. + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ +# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie +# RUN: llvm-bolt %t.exe --print-normalized --keep-nops -o %t.out \ +# RUN: --alt-inst-feature-size=2 | FileCheck %s + +## Older kernels used to have padlen field in alt_instr. Check compatibility. + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --defsym PADLEN=1 \ +# RUN: %s -o %t.o +# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ +# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie +# RUN: llvm-bolt %t.exe --print-normalized --keep-nops --alt-inst-has-padlen \ +# RUN: -o %t.out | FileCheck %s + +## Check with a larger size of "feature" field in alt_instr. + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ +# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.o +# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ +# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie +# RUN: llvm-bolt %t.exe --print-normalized --keep-nops \ +# RUN: --alt-inst-feature-size=4 -o %t.out | FileCheck %s + +## Check that out-of-bounds read is handled properly. + +# RUN: not llvm-bolt %t.exe --print-normalized --keep-nops \ +# RUN: --alt-inst-feature-size=2 -o %t.out + +# CHECK: BOLT-INFO: Linux kernel binary detected +# CHECK: BOLT-INFO: parsed 2 alternative instruction entries + + .text + .globl _start + .type _start, %function +_start: +# CHECK: Binary Function "_start" +.L0: + rdtsc +# CHECK: rdtsc +# CHECK-SAME: AltInst: 1 +# CHECK-SAME: AltInst2: 2 + nop +# CHECK-NEXT: nop +# CHECK-SAME: AltInst: 1 +# CHECK-SAME: AltInst2: 2 + nop + nop +.L1: + ret + .size _start, .-_start + + .section .altinstr_replacement,"ax",@progbits +.A0: + lfence + rdtsc +.A1: + rdtscp +.Ae: + +## Alternative instruction info. + .section .altinstructions,"a",@progbits + + .long .L0 - . # org instruction + .long .A0 - . # alt instruction +.ifdef FEATURE_SIZE_4 + .long 0x72 # feature flags +.else + .word 0x72 # feature flags +.endif + .byte .L1 - .L0 # org size + .byte .A1 - .A0 # alt size +.ifdef PADLEN + .byte 0 +.endif + + .long .L0 - . # org instruction + .long .A1 - . # alt instruction +.ifdef FEATURE_SIZE_4 + .long 0x3b # feature flags +.else + .word 0x3b # feature flags +.endif + .byte .L1 - .L0 # org size + .byte .Ae - .A1 # alt size +.ifdef PADLEN + .byte 0 +.endif + +## Fake Linux Kernel sections. + .section __ksymtab,"a",@progbits + .section __ksymtab_gpl,"a",@progbits diff --git a/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp b/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp index f0ffa517047b27..a48e45e1356813 100644 --- a/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp @@ -155,15 +155,16 @@ AST_MATCHER_P(DeclRefExpr, doesNotMutateObject, int, Indirections) { if (const auto *const Member = dyn_cast(P)) { if (const auto *const Method = dyn_cast(Member->getMemberDecl())) { - if (!Method->isConst()) { - // The method can mutate our variable. - return false; + if (Method->isConst() || Method->isStatic()) { + // The method call cannot mutate our variable. + continue; } - continue; + return false; } Stack.emplace_back(Member, 0); continue; } + if (const auto *const Op = dyn_cast(P)) { switch (Op->getOpcode()) { case UO_AddrOf: diff --git a/clang-tools-extra/unittests/clang-tidy/DeclRefExprUtilsTest.cpp b/clang-tools-extra/unittests/clang-tidy/DeclRefExprUtilsTest.cpp index 4c9e81ea0f61ac..3d9f51e2e17b09 100644 --- a/clang-tools-extra/unittests/clang-tidy/DeclRefExprUtilsTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/DeclRefExprUtilsTest.cpp @@ -51,6 +51,8 @@ template void RunTest(StringRef Snippet) { void constMethod() const; void nonConstMethod(); + static void staticMethod(); + void operator()(ConstTag) const; void operator()(NonConstTag); @@ -109,10 +111,12 @@ TEST(ConstReferenceDeclRefExprsTest, ConstValueVar) { useConstPtr(&/*const*/target); useConstPtrConstRef(&/*const*/target); /*const*/target.constMethod(); + /*const*/target.staticMethod(); /*const*/target(ConstTag{}); /*const*/target[42]; useConstRef((/*const*/target)); (/*const*/target).constMethod(); + /*const*/target.staticMethod(); (void)(/*const*/target == /*const*/target); (void)/*const*/target; (void)&/*const*/target; @@ -140,6 +144,7 @@ TEST(ConstReferenceDeclRefExprsTest, ConstRefVar) { useConstPtr(&/*const*/target); useConstPtrConstRef(&/*const*/target); /*const*/target.constMethod(); + /*const*/target.staticMethod(); /*const*/target(ConstTag{}); /*const*/target[42]; useConstRef((/*const*/target)); @@ -179,6 +184,7 @@ TEST(ConstReferenceDeclRefExprsTest, ValueVar) { useConstPtr(&/*const*/target); useConstPtrConstRef(&/*const*/target); /*const*/target.constMethod(); + /*const*/target.staticMethod(); target.nonConstMethod(); /*const*/target(ConstTag{}); target[42]; @@ -218,6 +224,7 @@ TEST(ConstReferenceDeclRefExprsTest, RefVar) { useConstPtr(&/*const*/target); useConstPtrConstRef(&/*const*/target); /*const*/target.constMethod(); + /*const*/target.staticMethod(); target.nonConstMethod(); /*const*/target(ConstTag{}); target[42]; @@ -256,6 +263,7 @@ TEST(ConstReferenceDeclRefExprsTest, PtrVar) { useConstPtrConstRef(/*const*/target); usePtrConstPtr(&target); /*const*/target->constMethod(); + /*const*/target->staticMethod(); target->nonConstMethod(); (*/*const*/target)(ConstTag{}); (*target)[42]; @@ -292,6 +300,7 @@ TEST(ConstReferenceDeclRefExprsTest, ConstPtrVar) { useConstPtrConstPtr(&/*const*/target); useConstPtrConstRef(/*const*/target); /*const*/target->constMethod(); + /*const*/target->staticMethod(); (*/*const*/target)(ConstTag{}); (*/*const*/target)[42]; /*const*/target->operator[](42); diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 2b54dffd058a35..06af93fd3c15ca 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -5378,6 +5378,7 @@ The following builtin intrinsics can be used in constant expressions: * ``__builtin_popcount`` * ``__builtin_popcountl`` * ``__builtin_popcountll`` +* ``__builtin_popcountg`` * ``__builtin_rotateleft8`` * ``__builtin_rotateleft16`` * ``__builtin_rotateleft32`` diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8300a8484585ae..fa23c215790f11 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -97,6 +97,8 @@ C++23 Feature Support - Implemented `P2718R0: Lifetime extension in range-based for loops `_. Also materialize temporary object which is a prvalue in discarded-value expression. +- Implemented `P2448R2: Relaxing some constexpr restrictions `_. + C++2c Feature Support ^^^^^^^^^^^^^^^^^^^^^ @@ -155,6 +157,11 @@ Non-comprehensive list of changes in this release - ``__builtin_addc``, ``__builtin_subc``, and the other sizes of those builtins are now constexpr and may be used in constant expressions. +- Added ``__builtin_popcountg`` as a type-generic alternative to + ``__builtin_popcount{,l,ll}`` with support for any unsigned integer type. Like + the previous builtins, this new builtin is constexpr and may be used in + constant expressions. + New Compiler Flags ------------------ @@ -389,6 +396,9 @@ RISC-V Support CUDA/HIP Language Changes ^^^^^^^^^^^^^^^^^^^^^^^^^ +- PTX is no longer included by default when compiling for CUDA. Using + ``--cuda-include-ptx=all`` will return the old behavior. + CUDA Support ^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 61117cc5ce71f9..a5879591f4c659 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -4419,7 +4419,7 @@ class FileScopeAsmDecl : public Decl { /// /// \note This is used in libInterpreter, clang -cc1 -fincremental-extensions /// and in tools such as clang-repl. -class TopLevelStmtDecl : public Decl { +class TopLevelStmtDecl : public Decl, public DeclContext { friend class ASTDeclReader; friend class ASTDeclWriter; @@ -4427,7 +4427,7 @@ class TopLevelStmtDecl : public Decl { bool IsSemiMissing = false; TopLevelStmtDecl(DeclContext *DC, SourceLocation L, Stmt *S) - : Decl(TopLevelStmt, DC, L), Statement(S) {} + : Decl(TopLevelStmt, DC, L), DeclContext(TopLevelStmt), Statement(S) {} virtual void anchor(); @@ -4438,15 +4438,19 @@ class TopLevelStmtDecl : public Decl { SourceRange getSourceRange() const override LLVM_READONLY; Stmt *getStmt() { return Statement; } const Stmt *getStmt() const { return Statement; } - void setStmt(Stmt *S) { - assert(IsSemiMissing && "Operation supported for printing values only!"); - Statement = S; - } + void setStmt(Stmt *S); bool isSemiMissing() const { return IsSemiMissing; } void setSemiMissing(bool Missing = true) { IsSemiMissing = Missing; } static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K == TopLevelStmt; } + + static DeclContext *castToDeclContext(const TopLevelStmtDecl *D) { + return static_cast(const_cast(D)); + } + static TopLevelStmtDecl *castFromDeclContext(const DeclContext *DC) { + return static_cast(const_cast(DC)); + } }; /// Represents a block literal declaration, which is like an diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 9a4736019d1b1b..76810a86a78a46 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -2120,6 +2120,7 @@ class DeclContext { case Decl::Block: case Decl::Captured: case Decl::ObjCMethod: + case Decl::TopLevelStmt: return true; default: return getDeclKind() >= Decl::firstFunction && diff --git a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h index 405e93287a05d3..9a0a00f3c01343 100644 --- a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h @@ -58,19 +58,36 @@ class ControlFlowContext { return BlockReachable[B.getBlockID()]; } + /// Returns whether `B` contains an expression that is consumed in a + /// different block than `B` (i.e. the parent of the expression is in a + /// different block). + /// This happens if there is control flow within a full-expression (triggered + /// by `&&`, `||`, or the conditional operator). Note that the operands of + /// these operators are not the only expressions that can be consumed in a + /// different block. For example, in the function call + /// `f(&i, cond() ? 1 : 0)`, `&i` is in a different block than the `CallExpr`. + bool containsExprConsumedInDifferentBlock(const CFGBlock &B) const { + return ContainsExprConsumedInDifferentBlock.contains(&B); + } + private: - ControlFlowContext(const Decl &D, std::unique_ptr Cfg, - llvm::DenseMap StmtToBlock, - llvm::BitVector BlockReachable) + ControlFlowContext( + const Decl &D, std::unique_ptr Cfg, + llvm::DenseMap StmtToBlock, + llvm::BitVector BlockReachable, + llvm::DenseSet ContainsExprConsumedInDifferentBlock) : ContainingDecl(D), Cfg(std::move(Cfg)), StmtToBlock(std::move(StmtToBlock)), - BlockReachable(std::move(BlockReachable)) {} + BlockReachable(std::move(BlockReachable)), + ContainsExprConsumedInDifferentBlock( + std::move(ContainsExprConsumedInDifferentBlock)) {} /// The `Decl` containing the statement used to construct the CFG. const Decl &ContainingDecl; std::unique_ptr Cfg; llvm::DenseMap StmtToBlock; llvm::BitVector BlockReachable; + llvm::DenseSet ContainsExprConsumedInDifferentBlock; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 62e7af7ac219bc..2330697299fdd7 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -210,6 +210,14 @@ class Environment { bool equivalentTo(const Environment &Other, Environment::ValueModel &Model) const; + /// How to treat expression state (`ExprToLoc` and `ExprToVal`) in a join. + /// If the join happens within a full expression, expression state should be + /// kept; otherwise, we can discard it. + enum ExprJoinBehavior { + DiscardExprState, + KeepExprState, + }; + /// Joins two environments by taking the intersection of storage locations and /// values that are stored in them. Distinct values that are assigned to the /// same storage locations in `EnvA` and `EnvB` are merged using `Model`. @@ -218,7 +226,8 @@ class Environment { /// /// `EnvA` and `EnvB` must use the same `DataflowAnalysisContext`. static Environment join(const Environment &EnvA, const Environment &EnvB, - Environment::ValueModel &Model); + Environment::ValueModel &Model, + ExprJoinBehavior ExprBehavior); /// Widens the environment point-wise, using `PrevEnv` as needed to inform the /// approximation. @@ -436,6 +445,11 @@ class Environment { return createObjectInternal(&D, D.getType(), InitExpr); } + /// Initializes the fields (including synthetic fields) of `Loc` with values, + /// unless values of the field type are not supported or we hit one of the + /// limits at which we stop producing values. + void initializeFieldsWithValues(RecordStorageLocation &Loc); + /// Assigns `Val` as the value of `Loc` in the environment. void setValue(const StorageLocation &Loc, Value &Val); diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index fa191c7378dba4..ebb616fbe253fc 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -983,6 +983,8 @@ def Availability : InheritableAttr { .Case("watchos_app_extension", "watchOS (App Extension)") .Case("maccatalyst", "macCatalyst") .Case("maccatalyst_app_extension", "macCatalyst (App Extension)") + .Case("xros", "visionOS") + .Case("xros_app_extension", "visionOS (App Extension)") .Case("swift", "Swift") .Case("shadermodel", "HLSL ShaderModel") .Case("ohos", "OpenHarmony OS") @@ -1000,6 +1002,8 @@ static llvm::StringRef getPlatformNameSourceSpelling(llvm::StringRef Platform) { .Case("watchos_app_extension", "watchOSApplicationExtension") .Case("maccatalyst", "macCatalyst") .Case("maccatalyst_app_extension", "macCatalystApplicationExtension") + .Case("xros", "visionOS") + .Case("xros_app_extension", "visionOSApplicationExtension") .Case("zos", "z/OS") .Case("shadermodel", "ShaderModel") .Default(Platform); @@ -1016,6 +1020,10 @@ static llvm::StringRef canonicalizePlatformName(llvm::StringRef Platform) { .Case("watchOSApplicationExtension", "watchos_app_extension") .Case("macCatalyst", "maccatalyst") .Case("macCatalystApplicationExtension", "maccatalyst_app_extension") + .Case("visionOS", "xros") + .Case("visionOSApplicationExtension", "xros_app_extension") + .Case("visionos", "xros") + .Case("visionos_app_extension", "xros_app_extension") .Case("ShaderModel", "shadermodel") .Default(Platform); } }]; diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index a81131d82c4cb4..9c703377ca8d3e 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -706,7 +706,7 @@ def Popcount : Builtin, BitInt_Long_LongLongTemplate { def Popcountg : Builtin { let Spellings = ["__builtin_popcountg"]; - let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Attributes = [NoThrow, Const, Constexpr, CustomTypeChecking]; let Prototype = "int(...)"; } diff --git a/clang/include/clang/Basic/DarwinSDKInfo.h b/clang/include/clang/Basic/DarwinSDKInfo.h index dedfbd934a7b63..db20b968a898ea 100644 --- a/clang/include/clang/Basic/DarwinSDKInfo.h +++ b/clang/include/clang/Basic/DarwinSDKInfo.h @@ -105,6 +105,30 @@ class DarwinSDKInfo { map(const VersionTuple &Key, const VersionTuple &MinimumValue, std::optional MaximumValue) const; + /// Remap the 'introduced' availability version. + /// If None is returned, the 'unavailable' availability should be used + /// instead. + std::optional + mapIntroducedAvailabilityVersion(const VersionTuple &Key) const { + // API_TO_BE_DEPRECATED is 100000. + if (Key.getMajor() == 100000) + return VersionTuple(100000); + // Use None for maximum to force unavailable behavior for + return map(Key, MinimumValue, std::nullopt); + } + + /// Remap the 'deprecated' and 'obsoleted' availability version. + /// If None is returned for 'obsoleted', the 'unavailable' availability + /// should be used instead. If None is returned for 'deprecated', the + /// 'deprecated' version should be dropped. + std::optional + mapDeprecatedObsoletedAvailabilityVersion(const VersionTuple &Key) const { + // API_TO_BE_DEPRECATED is 100000. + if (Key.getMajor() == 100000) + return VersionTuple(100000); + return map(Key, MinimumValue, MaximumValue); + } + static std::optional parseJSON(const llvm::json::Object &Obj, VersionTuple MaximumDeploymentTarget); diff --git a/clang/include/clang/Basic/DeclNodes.td b/clang/include/clang/Basic/DeclNodes.td index 8b1f415dd5fe2c..48396e85c5adac 100644 --- a/clang/include/clang/Basic/DeclNodes.td +++ b/clang/include/clang/Basic/DeclNodes.td @@ -95,7 +95,7 @@ def LinkageSpec : DeclNode, DeclContext; def Export : DeclNode, DeclContext; def ObjCPropertyImpl : DeclNode; def FileScopeAsm : DeclNode; -def TopLevelStmt : DeclNode; +def TopLevelStmt : DeclNode, DeclContext; def AccessSpec : DeclNode; def Friend : DeclNode; def FriendTemplate : DeclNode; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5a90e631a894c9..c8dfdc08f5ea07 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9607,13 +9607,10 @@ def err_defaulted_copy_assign_not_ref : Error< "the parameter for an explicitly-defaulted copy assignment operator must be an " "lvalue reference type">; def err_incorrect_defaulted_constexpr : Error< - "defaulted definition of %sub{select_special_member_kind}0 " - "is not constexpr">; + "defaulted definition of %sub{select_special_member_kind}0 cannot be marked %select{constexpr|consteval}1 " + "before C++23">; def err_incorrect_defaulted_constexpr_with_vb: Error< "%sub{select_special_member_kind}0 cannot be 'constexpr' in a class with virtual base class">; -def err_incorrect_defaulted_consteval : Error< - "defaulted declaration of %sub{select_special_member_kind}0 " - "cannot be consteval because implicit definition is not constexpr">; def warn_defaulted_method_deleted : Warning< "explicitly defaulted %sub{select_special_member_kind}0 is implicitly " "deleted">, InGroup; @@ -9724,21 +9721,12 @@ def note_defaulted_comparison_cannot_deduce_undeduced_auto : Note< "%select{|member|base class}0 %1 declared here">; def note_defaulted_comparison_cannot_deduce_callee : Note< "selected 'operator<=>' for %select{|member|base class}0 %1 declared here">; -def ext_defaulted_comparison_constexpr_mismatch : Extension< +def err_defaulted_comparison_constexpr_mismatch : Error< "defaulted definition of %select{%sub{select_defaulted_comparison_kind}1|" - "three-way comparison operator}0 that is " - "declared %select{constexpr|consteval}2 but" - "%select{|for which the corresponding implicit 'operator==' }0 " - "invokes a non-constexpr comparison function is a C++23 extension">, - InGroup>; -def warn_cxx23_compat_defaulted_comparison_constexpr_mismatch : Warning< - "defaulted definition of %select{%sub{select_defaulted_comparison_kind}1|" - "three-way comparison operator}0 that is " - "declared %select{constexpr|consteval}2 but" - "%select{|for which the corresponding implicit 'operator==' }0 " - "invokes a non-constexpr comparison function is incompatible with C++ " - "standards before C++23">, - InGroup, DefaultIgnore; + "three-way comparison operator}0 cannot be " + "declared %select{constexpr|consteval}2 because " + "%select{it|for which the corresponding implicit 'operator==' }0 " + "invokes a non-constexpr comparison function ">; def note_defaulted_comparison_not_constexpr : Note< "non-constexpr comparison function would be used to compare " "%select{|member %1|base class %1}0">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bef38738fde82e..5b3d366dbcf91b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7414,7 +7414,9 @@ def ast_view : Flag<["-"], "ast-view">, def emit_module : Flag<["-"], "emit-module">, HelpText<"Generate pre-compiled module file from a module map">; def emit_module_interface : Flag<["-"], "emit-module-interface">, - HelpText<"Generate pre-compiled module file from a C++ module interface">; + HelpText<"Generate pre-compiled module file from a standard C++ module interface unit">; +def emit_reduced_module_interface : Flag<["-"], "emit-reduced-module-interface">, + HelpText<"Generate reduced prebuilt module interface from a standard C++ module interface unit">; def emit_header_unit : Flag<["-"], "emit-header-unit">, HelpText<"Generate C++20 header units from header files">; def emit_pch : Flag<["-"], "emit-pch">, diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h index fcce31ac0590ff..8441af2ee3e718 100644 --- a/clang/include/clang/Frontend/FrontendActions.h +++ b/clang/include/clang/Frontend/FrontendActions.h @@ -118,6 +118,9 @@ class GenerateModuleAction : public ASTFrontendAction { CreateOutputFile(CompilerInstance &CI, StringRef InFile) = 0; protected: + std::vector> + CreateMultiplexConsumer(CompilerInstance &CI, StringRef InFile); + std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override; @@ -147,8 +150,10 @@ class GenerateModuleFromModuleMapAction : public GenerateModuleAction { CreateOutputFile(CompilerInstance &CI, StringRef InFile) override; }; +/// Generates full BMI (which contains full information to generate the object +/// files) for C++20 Named Modules. class GenerateModuleInterfaceAction : public GenerateModuleAction { -private: +protected: bool BeginSourceFileAction(CompilerInstance &CI) override; std::unique_ptr CreateASTConsumer(CompilerInstance &CI, @@ -158,6 +163,14 @@ class GenerateModuleInterfaceAction : public GenerateModuleAction { CreateOutputFile(CompilerInstance &CI, StringRef InFile) override; }; +/// Only generates the reduced BMI. This action is mainly used by tests. +class GenerateReducedModuleInterfaceAction + : public GenerateModuleInterfaceAction { +private: + std::unique_ptr CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) override; +}; + class GenerateHeaderUnitAction : public GenerateModuleAction { private: diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 53a8681cfdbba0..8085dbcbf671a6 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -85,9 +85,13 @@ enum ActionKind { /// Generate pre-compiled module from a module map. GenerateModule, - /// Generate pre-compiled module from a C++ module interface file. + /// Generate pre-compiled module from a standard C++ module interface unit. GenerateModuleInterface, + /// Generate reduced module interface for a standard C++ module interface + /// unit. + GenerateReducedModuleInterface, + /// Generate a C++20 header unit module from a header file. GenerateHeaderUnit, diff --git a/clang/include/clang/InstallAPI/Frontend.h b/clang/include/clang/InstallAPI/Frontend.h index 8774321e990c13..cbc2b159ebd17a 100644 --- a/clang/include/clang/InstallAPI/Frontend.h +++ b/clang/include/clang/InstallAPI/Frontend.h @@ -50,12 +50,15 @@ class FrontendRecordsSlice : public llvm::MachO::RecordsSlice { /// \param D The pointer to the declaration from traversing AST. /// \param Access The intended access level of symbol. /// \param Flags The flags that describe attributes of the symbol. + /// \param Inlined Whether declaration is inlined, only applicable to + /// functions. /// \return The non-owning pointer to added record in slice. GlobalRecord *addGlobal(StringRef Name, RecordLinkage Linkage, GlobalRecord::Kind GV, const clang::AvailabilityInfo Avail, const Decl *D, const HeaderType Access, - SymbolFlags Flags = SymbolFlags::None); + SymbolFlags Flags = SymbolFlags::None, + bool Inlined = false); /// Add ObjC Class record with attributes from AST. /// diff --git a/clang/include/clang/InstallAPI/Visitor.h b/clang/include/clang/InstallAPI/Visitor.h index ff0a9957aa86bc..71d4d9894f4205 100644 --- a/clang/include/clang/InstallAPI/Visitor.h +++ b/clang/include/clang/InstallAPI/Visitor.h @@ -37,6 +37,9 @@ class InstallAPIVisitor final : public ASTConsumer, /// Collect global variables. bool VisitVarDecl(const VarDecl *D); + /// Collect global functions. + bool VisitFunctionDecl(const FunctionDecl *D); + /// Collect Objective-C Interface declarations. /// Every Objective-C class has an interface declaration that lists all the /// ivars, properties, and methods of the class. diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index 292fa566ae7037..c8f932e95c4798 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -48,6 +48,8 @@ class IncrementalCompilerBuilder { UserArgs = Args; } + void SetTargetTriple(std::string TT) { TargetTriple = TT; } + // General C++ llvm::Expected> CreateCpp(); @@ -62,11 +64,12 @@ class IncrementalCompilerBuilder { private: static llvm::Expected> - create(std::vector &ClangArgv); + create(std::string TT, std::vector &ClangArgv); llvm::Expected> createCuda(bool device); std::vector UserArgs; + std::optional TargetTriple; llvm::StringRef OffloadArch; llvm::StringRef CudaSDKPath; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 2d949f3fc9a718..592c7871a4a55d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3263,7 +3263,8 @@ class Sema final { Decl *ActOnFileScopeAsmDecl(Expr *expr, SourceLocation AsmLoc, SourceLocation RParenLoc); - Decl *ActOnTopLevelStmtDecl(Stmt *Statement); + TopLevelStmtDecl *ActOnStartTopLevelStmtDecl(Scope *S); + void ActOnFinishTopLevelStmtDecl(TopLevelStmtDecl *D, Stmt *Statement); void ActOnPopScope(SourceLocation Loc, Scope *S); diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 5e2f305b294caf..e5db486a71a490 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -166,6 +166,10 @@ class ASTWriter : public ASTDeserializationListener, /// Indicates that the AST contained compiler errors. bool ASTHasCompilerErrors = false; + /// Indicates that we're going to generate the reduced BMI for C++20 + /// named modules. + bool GeneratingReducedBMI = false; + /// Mapping from input file entries to the index into the /// offset table where information about that input file is stored. llvm::DenseMap InputFileIDs; @@ -596,7 +600,8 @@ class ASTWriter : public ASTDeserializationListener, ASTWriter(llvm::BitstreamWriter &Stream, SmallVectorImpl &Buffer, InMemoryModuleCache &ModuleCache, ArrayRef> Extensions, - bool IncludeTimestamps = true, bool BuildingImplicitModule = false); + bool IncludeTimestamps = true, bool BuildingImplicitModule = false, + bool GeneratingReducedBMI = false); ~ASTWriter() override; ASTContext &getASTContext() const { @@ -856,6 +861,13 @@ class PCHGenerator : public SemaConsumer { const ASTWriter &getWriter() const { return Writer; } SmallVectorImpl &getPCH() const { return Buffer->Data; } + bool isComplete() const { return Buffer->IsComplete; } + PCHBuffer *getBufferPtr() { return Buffer.get(); } + StringRef getOutputFile() const { return OutputFile; } + DiagnosticsEngine &getDiagnostics() const { + return SemaPtr->getDiagnostics(); + } + public: PCHGenerator(const Preprocessor &PP, InMemoryModuleCache &ModuleCache, StringRef OutputFile, StringRef isysroot, @@ -863,7 +875,8 @@ class PCHGenerator : public SemaConsumer { ArrayRef> Extensions, bool AllowASTWithErrors = false, bool IncludeTimestamps = true, bool BuildingImplicitModule = false, - bool ShouldCacheASTInMemory = false); + bool ShouldCacheASTInMemory = false, + bool GeneratingReducedBMI = false); ~PCHGenerator() override; void InitializeSema(Sema &S) override { SemaPtr = &S; } @@ -873,6 +886,21 @@ class PCHGenerator : public SemaConsumer { bool hasEmittedPCH() const { return Buffer->IsComplete; } }; +class ReducedBMIGenerator : public PCHGenerator { +public: + ReducedBMIGenerator(const Preprocessor &PP, InMemoryModuleCache &ModuleCache, + StringRef OutputFile, std::shared_ptr Buffer, + bool IncludeTimestamps); + + void HandleTranslationUnit(ASTContext &Ctx) override; +}; + +/// If we can elide the definition of \param D in reduced BMI. +/// +/// Generally, we can elide the definition of a declaration if it won't affect +/// the ABI. e.g., the non-inline function bodies. +bool CanElideDeclDef(const Decl *D); + /// A simple helper class to pack several bits in order into (a) 32 bit /// integer(s). class BitsPacker { diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 59c039f1f8daeb..d681791d3920c3 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -5552,14 +5552,13 @@ FileScopeAsmDecl *FileScopeAsmDecl::CreateDeserialized(ASTContext &C, void TopLevelStmtDecl::anchor() {} TopLevelStmtDecl *TopLevelStmtDecl::Create(ASTContext &C, Stmt *Statement) { - assert(Statement); assert(C.getLangOpts().IncrementalExtensions && "Must be used only in incremental mode"); - SourceLocation BeginLoc = Statement->getBeginLoc(); + SourceLocation Loc = Statement ? Statement->getBeginLoc() : SourceLocation(); DeclContext *DC = C.getTranslationUnitDecl(); - return new (C, DC) TopLevelStmtDecl(DC, BeginLoc, Statement); + return new (C, DC) TopLevelStmtDecl(DC, Loc, Statement); } TopLevelStmtDecl *TopLevelStmtDecl::CreateDeserialized(ASTContext &C, @@ -5572,6 +5571,12 @@ SourceRange TopLevelStmtDecl::getSourceRange() const { return SourceRange(getLocation(), Statement->getEndLoc()); } +void TopLevelStmtDecl::setStmt(Stmt *S) { + assert(S); + Statement = S; + setLocation(Statement->getBeginLoc()); +} + void EmptyDecl::anchor() {} EmptyDecl *EmptyDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) { diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 10fe8bb97ce660..fcedb3cfd176a0 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -1352,6 +1352,7 @@ DeclContext *DeclContext::getPrimaryContext() { case Decl::ExternCContext: case Decl::LinkageSpec: case Decl::Export: + case Decl::TopLevelStmt: case Decl::Block: case Decl::Captured: case Decl::OMPDeclareReduction: diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index b4f2327d9c560a..1c3dcf63465c68 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -400,10 +400,11 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, // C++11 [class.ctor]p6: // If that user-written default constructor would satisfy the - // requirements of a constexpr constructor, the implicitly-defined - // default constructor is constexpr. + // requirements of a constexpr constructor/function(C++23), the + // implicitly-defined default constructor is constexpr. if (!BaseClassDecl->hasConstexprDefaultConstructor()) - data().DefaultedDefaultConstructorIsConstexpr = false; + data().DefaultedDefaultConstructorIsConstexpr = + C.getLangOpts().CPlusPlus23; // C++1z [class.copy]p8: // The implicitly-declared copy constructor for a class X will have @@ -548,7 +549,8 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { // -- for every subobject of class type or (possibly multi-dimensional) // array thereof, that class type shall have a constexpr destructor if (!Subobj->hasConstexprDestructor()) - data().DefaultedDestructorIsConstexpr = false; + data().DefaultedDestructorIsConstexpr = + getASTContext().getLangOpts().CPlusPlus23; // C++20 [temp.param]p7: // A structural type is [...] a literal class type [for which] the types @@ -1297,7 +1299,8 @@ void CXXRecordDecl::addedMember(Decl *D) { !FieldRec->hasConstexprDefaultConstructor() && !isUnion()) // The standard requires any in-class initializer to be a constant // expression. We consider this to be a defect. - data().DefaultedDefaultConstructorIsConstexpr = false; + data().DefaultedDefaultConstructorIsConstexpr = + Context.getLangOpts().CPlusPlus23; // C++11 [class.copy]p8: // The implicitly-declared copy constructor for a class X will have diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d8ca35740fbc35..4a7c7755e1d6fd 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12483,6 +12483,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__builtin_popcount: case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: + case Builtin::BI__builtin_popcountg: case Builtin::BI__popcnt16: // Microsoft variants of popcount case Builtin::BI__popcnt: case Builtin::BI__popcnt64: { diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index d887170cbc5d2d..a384e191464fea 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -314,11 +314,7 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { for (unsigned I = 0; I != 2; ++I) { if (!this->emitGetLocal(PT_Ptr, *SubExprOffset, CE)) return false; - if (!this->emitConstUint8(I, CE)) - return false; - if (!this->emitArrayElemPtrPopUint8(CE)) - return false; - if (!this->emitLoadPop(SourceElemT, CE)) + if (!this->emitArrayElemPop(SourceElemT, I, CE)) return false; // Do the cast. @@ -393,12 +389,16 @@ bool ByteCodeExprGen::VisitBinaryOperator(const BinaryOperator *BO) { if (BO->isLogicalOp()) return this->VisitLogicalBinOp(BO); - if (BO->getType()->isAnyComplexType()) - return this->VisitComplexBinOp(BO); - const Expr *LHS = BO->getLHS(); const Expr *RHS = BO->getRHS(); + if (BO->getType()->isAnyComplexType()) + return this->VisitComplexBinOp(BO); + if ((LHS->getType()->isAnyComplexType() || + RHS->getType()->isAnyComplexType()) && + BO->isComparisonOp()) + return this->emitComplexComparison(LHS, RHS, BO); + if (BO->isPtrMemOp()) return this->visit(RHS); @@ -725,11 +725,8 @@ bool ByteCodeExprGen::VisitComplexBinOp(const BinaryOperator *E) { if (IsComplex) { if (!this->emitGetLocal(PT_Ptr, Offset, E)) return false; - if (!this->emitConstUint8(ElemIndex, E)) - return false; - if (!this->emitArrayElemPtrPopUint8(E)) - return false; - return this->emitLoadPop(classifyComplexElementType(E->getType()), E); + return this->emitArrayElemPop(classifyComplexElementType(E->getType()), + ElemIndex, E); } if (ElemIndex == 0) return this->emitGetLocal(classifyPrim(E->getType()), Offset, E); @@ -1649,7 +1646,8 @@ bool ByteCodeExprGen::VisitMaterializeTemporaryExpr( SubExpr, *SubExprT, /*IsConst=*/true, /*IsExtended=*/true)) { if (!this->visit(SubExpr)) return false; - this->emitSetLocal(*SubExprT, *LocalIndex, E); + if (!this->emitSetLocal(*SubExprT, *LocalIndex, E)) + return false; return this->emitGetPtrLocal(*LocalIndex, E); } } else { @@ -3123,16 +3121,16 @@ bool ByteCodeExprGen::VisitUnaryOperator(const UnaryOperator *E) { if (!this->visit(SubExpr)) return false; - if (!this->emitConstUint8(1, E)) - return false; - if (!this->emitArrayElemPtrPopUint8(E)) - return false; + + if (SubExpr->isLValue()) { + if (!this->emitConstUint8(1, E)) + return false; + return this->emitArrayElemPtrPopUint8(E); + } // Since our _Complex implementation does not map to a primitive type, // we sometimes have to do the lvalue-to-rvalue conversion here manually. - if (!SubExpr->isLValue()) - return this->emitLoadPop(classifyPrim(E->getType()), E); - return true; + return this->emitArrayElemPop(classifyPrim(E->getType()), 1, E); } case UO_Extension: return this->delegate(SubExpr); @@ -3343,17 +3341,15 @@ bool ByteCodeExprGen::emitComplexReal(const Expr *SubExpr) { if (!this->visit(SubExpr)) return false; - if (!this->emitConstUint8(0, SubExpr)) - return false; - if (!this->emitArrayElemPtrPopUint8(SubExpr)) - return false; + if (SubExpr->isLValue()) { + if (!this->emitConstUint8(0, SubExpr)) + return false; + return this->emitArrayElemPtrPopUint8(SubExpr); + } - // Since our _Complex implementation does not map to a primitive type, - // we sometimes have to do the lvalue-to-rvalue conversion here manually. - if (!SubExpr->isLValue()) - return this->emitLoadPop(classifyComplexElementType(SubExpr->getType()), - SubExpr); - return true; + // Rvalue, load the actual element. + return this->emitArrayElemPop(classifyComplexElementType(SubExpr->getType()), + 0, SubExpr); } template @@ -3362,11 +3358,7 @@ bool ByteCodeExprGen::emitComplexBoolCast(const Expr *E) { PrimType ElemT = classifyComplexElementType(E->getType()); // We emit the expression (__real(E) != 0 || __imag(E) != 0) // for us, that means (bool)E[0] || (bool)E[1] - if (!this->emitConstUint8(0, E)) - return false; - if (!this->emitArrayElemPtrUint8(E)) - return false; - if (!this->emitLoadPop(ElemT, E)) + if (!this->emitArrayElem(ElemT, 0, E)) return false; if (ElemT == PT_Float) { if (!this->emitCastFloatingIntegral(PT_Bool, E)) @@ -3381,11 +3373,7 @@ bool ByteCodeExprGen::emitComplexBoolCast(const Expr *E) { if (!this->jumpTrue(LabelTrue)) return false; - if (!this->emitConstUint8(1, E)) - return false; - if (!this->emitArrayElemPtrPopUint8(E)) - return false; - if (!this->emitLoadPop(ElemT, E)) + if (!this->emitArrayElemPop(ElemT, 1, E)) return false; if (ElemT == PT_Float) { if (!this->emitCastFloatingIntegral(PT_Bool, E)) @@ -3410,6 +3398,102 @@ bool ByteCodeExprGen::emitComplexBoolCast(const Expr *E) { return true; } +template +bool ByteCodeExprGen::emitComplexComparison(const Expr *LHS, + const Expr *RHS, + const BinaryOperator *E) { + assert(E->isComparisonOp()); + assert(!Initializing); + assert(!DiscardResult); + + PrimType ElemT; + bool LHSIsComplex; + unsigned LHSOffset; + if (LHS->getType()->isAnyComplexType()) { + LHSIsComplex = true; + ElemT = classifyComplexElementType(LHS->getType()); + LHSOffset = allocateLocalPrimitive(LHS, PT_Ptr, /*IsConst=*/true, + /*IsExtended=*/false); + if (!this->visit(LHS)) + return false; + if (!this->emitSetLocal(PT_Ptr, LHSOffset, E)) + return false; + } else { + LHSIsComplex = false; + PrimType LHST = classifyPrim(LHS->getType()); + LHSOffset = this->allocateLocalPrimitive(LHS, LHST, true, false); + if (!this->visit(LHS)) + return false; + if (!this->emitSetLocal(LHST, LHSOffset, E)) + return false; + } + + bool RHSIsComplex; + unsigned RHSOffset; + if (RHS->getType()->isAnyComplexType()) { + RHSIsComplex = true; + ElemT = classifyComplexElementType(RHS->getType()); + RHSOffset = allocateLocalPrimitive(RHS, PT_Ptr, /*IsConst=*/true, + /*IsExtended=*/false); + if (!this->visit(RHS)) + return false; + if (!this->emitSetLocal(PT_Ptr, RHSOffset, E)) + return false; + } else { + RHSIsComplex = false; + PrimType RHST = classifyPrim(RHS->getType()); + RHSOffset = this->allocateLocalPrimitive(RHS, RHST, true, false); + if (!this->visit(RHS)) + return false; + if (!this->emitSetLocal(RHST, RHSOffset, E)) + return false; + } + + auto getElem = [&](unsigned LocalOffset, unsigned Index, + bool IsComplex) -> bool { + if (IsComplex) { + if (!this->emitGetLocal(PT_Ptr, LocalOffset, E)) + return false; + return this->emitArrayElemPop(ElemT, Index, E); + } + return this->emitGetLocal(ElemT, LocalOffset, E); + }; + + for (unsigned I = 0; I != 2; ++I) { + // Get both values. + if (!getElem(LHSOffset, I, LHSIsComplex)) + return false; + if (!getElem(RHSOffset, I, RHSIsComplex)) + return false; + // And compare them. + if (!this->emitEQ(ElemT, E)) + return false; + + if (!this->emitCastBoolUint8(E)) + return false; + } + + // We now have two bool values on the stack. Compare those. + if (!this->emitAddUint8(E)) + return false; + if (!this->emitConstUint8(2, E)) + return false; + + if (E->getOpcode() == BO_EQ) { + if (!this->emitEQUint8(E)) + return false; + } else if (E->getOpcode() == BO_NE) { + if (!this->emitNEUint8(E)) + return false; + } else + return false; + + // In C, this returns an int. + if (PrimType ResT = classifyPrim(E->getType()); ResT != PT_Bool) + return this->emitCast(PT_Bool, ResT, E); + return true; +} + /// When calling this, we have a pointer of the local-to-destroy /// on the stack. /// Emit destruction of record types (or arrays of record types). diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h index acbbcc3dc9619a..5977bb5e6ff25d 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.h +++ b/clang/lib/AST/Interp/ByteCodeExprGen.h @@ -268,6 +268,8 @@ class ByteCodeExprGen : public ConstStmtVisitor, bool>, bool emitComplexReal(const Expr *SubExpr); bool emitComplexBoolCast(const Expr *E); + bool emitComplexComparison(const Expr *LHS, const Expr *RHS, + const BinaryOperator *E); bool emitRecordDestruction(const Record *R); bool emitDestruction(const Descriptor *Desc); diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 43cbc2ff292c09..bb220657c2dadc 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -1959,10 +1959,24 @@ inline bool ArrayElemPtrPop(InterpState &S, CodePtr OpPC) { return NarrowPtr(S, OpPC); } +template ::T> +inline bool ArrayElem(InterpState &S, CodePtr OpPC, uint32_t Index) { + const Pointer &Ptr = S.Stk.peek(); + + if (!CheckLoad(S, OpPC, Ptr)) + return false; + + S.Stk.push(Ptr.atIndex(Index).deref()); + return true; +} + template ::T> inline bool ArrayElemPop(InterpState &S, CodePtr OpPC, uint32_t Index) { const Pointer &Ptr = S.Stk.pop(); + if (!CheckLoad(S, OpPC, Ptr)) + return false; + S.Stk.push(Ptr.atIndex(Index).deref()); return true; } diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td index ffc54646f0279e..9b99aa0ccb558a 100644 --- a/clang/lib/AST/Interp/Opcodes.td +++ b/clang/lib/AST/Interp/Opcodes.td @@ -368,6 +368,14 @@ def ArrayElemPop : Opcode { let HasGroup = 1; } +def ArrayElem : Opcode { + let Args = [ArgUint32]; + let Types = [AllTypeClass]; + let HasGroup = 1; +} + + + //===----------------------------------------------------------------------===// // Direct field accessors //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/Interp/Program.h b/clang/lib/AST/Interp/Program.h index 045bf7ab7745b1..50bdb575e805cf 100644 --- a/clang/lib/AST/Interp/Program.h +++ b/clang/lib/AST/Interp/Program.h @@ -34,7 +34,6 @@ class VarDecl; namespace interp { class Context; -class Record; /// The program contains and links the bytecode for all functions. class Program final { diff --git a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp index 8aed19544be6a2..7c9f8fbb0a7009 100644 --- a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -94,6 +94,38 @@ static llvm::BitVector findReachableBlocks(const CFG &Cfg) { return BlockReachable; } +static llvm::DenseSet +buildContainsExprConsumedInDifferentBlock( + const CFG &Cfg, + const llvm::DenseMap &StmtToBlock) { + llvm::DenseSet Result; + + auto CheckChildExprs = [&Result, &StmtToBlock](const Stmt *S, + const CFGBlock *Block) { + for (const Stmt *Child : S->children()) { + if (!isa(Child)) + continue; + const CFGBlock *ChildBlock = StmtToBlock.lookup(Child); + if (ChildBlock != Block) + Result.insert(ChildBlock); + } + }; + + for (const CFGBlock *Block : Cfg) { + if (Block == nullptr) + continue; + + for (const CFGElement &Element : *Block) + if (auto S = Element.getAs()) + CheckChildExprs(S->getStmt(), Block); + + if (const Stmt *TerminatorCond = Block->getTerminatorCondition()) + CheckChildExprs(TerminatorCond, Block); + } + + return Result; +} + llvm::Expected ControlFlowContext::build(const FunctionDecl &Func) { if (!Func.doesThisDeclarationHaveABody()) @@ -140,8 +172,12 @@ ControlFlowContext::build(const Decl &D, Stmt &S, ASTContext &C) { llvm::BitVector BlockReachable = findReachableBlocks(*Cfg); + llvm::DenseSet ContainsExprConsumedInDifferentBlock = + buildContainsExprConsumedInDifferentBlock(*Cfg, StmtToBlock); + return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock), - std::move(BlockReachable)); + std::move(BlockReachable), + std::move(ContainsExprConsumedInDifferentBlock)); } } // namespace dataflow diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index fd7b06efcc7861..1d2bd9a9b08af3 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -48,6 +48,24 @@ static llvm::DenseMap intersectDeclToLoc( return Result; } +// Performs a join on either `ExprToLoc` or `ExprToVal`. +// The maps must be consistent in the sense that any entries for the same +// expression must map to the same location / value. This is the case if we are +// performing a join for control flow within a full-expression (which is the +// only case when this function should be used). +template MapT joinExprMaps(const MapT &Map1, const MapT &Map2) { + MapT Result = Map1; + + for (const auto &Entry : Map2) { + [[maybe_unused]] auto [It, Inserted] = Result.insert(Entry); + // If there was an existing entry, its value should be the same as for the + // entry we were trying to insert. + assert(It->second == Entry.second); + } + + return Result; +} + // Whether to consider equivalent two values with an unknown relation. // // FIXME: this function is a hack enabling unsoundness to support @@ -414,8 +432,15 @@ void Environment::initialize() { } } else if (MethodDecl->isImplicitObjectMemberFunction()) { QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType(); - setThisPointeeStorageLocation( - cast(createObject(ThisPointeeType))); + auto &ThisLoc = + cast(createStorageLocation(ThisPointeeType)); + setThisPointeeStorageLocation(ThisLoc); + refreshRecordValue(ThisLoc, *this); + // Initialize fields of `*this` with values, but only if we're not + // analyzing a constructor; after all, it's the constructor's job to do + // this (and we want to be able to test that). + if (!isa(MethodDecl)) + initializeFieldsWithValues(ThisLoc); } } } @@ -627,7 +652,8 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv, } Environment Environment::join(const Environment &EnvA, const Environment &EnvB, - Environment::ValueModel &Model) { + Environment::ValueModel &Model, + ExprJoinBehavior ExprBehavior) { assert(EnvA.DACtx == EnvB.DACtx); assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); assert(EnvA.CallStack == EnvB.CallStack); @@ -675,9 +701,10 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB, JoinedEnv.LocToVal = joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model); - // We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal` - // empty, as we never need to access entries in these maps outside of the - // basic block that sets them. + if (ExprBehavior == KeepExprState) { + JoinedEnv.ExprToVal = joinExprMaps(EnvA.ExprToVal, EnvB.ExprToVal); + JoinedEnv.ExprToLoc = joinExprMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); + } return JoinedEnv; } @@ -799,6 +826,16 @@ PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { return DACtx->getOrCreateNullPointerValue(PointeeType); } +void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc) { + llvm::DenseSet Visited; + int CreatedValuesCount = 0; + initializeFieldsWithValues(Loc, Visited, 0, CreatedValuesCount); + if (CreatedValuesCount > MaxCompositeValueSize) { + llvm::errs() << "Attempting to initialize a huge value of type: " + << Loc.getType() << '\n'; + } +} + void Environment::setValue(const StorageLocation &Loc, Value &Val) { assert(!isa(&Val) || &cast(&Val)->getLoc() == &Loc); diff --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp index ff4e18de2c70f1..6afd66d9dc6ac5 100644 --- a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp +++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp @@ -500,7 +500,7 @@ class HTMLLogger : public Logger { for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) { std::string Name = blockID(I); // Rightwards arrow, vertical line - char ConvergenceMarker[] = u8"\\n\u2192\u007c"; + const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c"; if (BlockConverged[I]) Name += ConvergenceMarker; GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\"" diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 4c88c46142d64d..939247c047c66e 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -221,6 +221,7 @@ class PrettyStackTraceCFGElement : public llvm::PrettyStackTraceEntry { // Avoids unneccesary copies of the environment. class JoinedStateBuilder { AnalysisContext &AC; + Environment::ExprJoinBehavior JoinBehavior; std::vector All; std::deque Owned; @@ -228,11 +229,13 @@ class JoinedStateBuilder { join(const TypeErasedDataflowAnalysisState &L, const TypeErasedDataflowAnalysisState &R) { return {AC.Analysis.joinTypeErased(L.Lattice, R.Lattice), - Environment::join(L.Env, R.Env, AC.Analysis)}; + Environment::join(L.Env, R.Env, AC.Analysis, JoinBehavior)}; } public: - JoinedStateBuilder(AnalysisContext &AC) : AC(AC) {} + JoinedStateBuilder(AnalysisContext &AC, + Environment::ExprJoinBehavior JoinBehavior) + : AC(AC), JoinBehavior(JoinBehavior) {} void addOwned(TypeErasedDataflowAnalysisState State) { Owned.push_back(std::move(State)); @@ -248,12 +251,12 @@ class JoinedStateBuilder { // initialize the state of each basic block differently. return {AC.Analysis.typeErasedInitialElement(), AC.InitEnv.fork()}; if (All.size() == 1) - // Join the environment with itself so that we discard the entries from - // `ExprToLoc` and `ExprToVal`. + // Join the environment with itself so that we discard expression state if + // desired. // FIXME: We could consider writing special-case code for this that only // does the discarding, but it's not clear if this is worth it. - return {All[0]->Lattice, - Environment::join(All[0]->Env, All[0]->Env, AC.Analysis)}; + return {All[0]->Lattice, Environment::join(All[0]->Env, All[0]->Env, + AC.Analysis, JoinBehavior)}; auto Result = join(*All[0], *All[1]); for (unsigned I = 2; I < All.size(); ++I) @@ -307,7 +310,22 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) { } } - JoinedStateBuilder Builder(AC); + // If any of the predecessor blocks contains an expression consumed in a + // different block, we need to keep expression state. + // Note that in this case, we keep expression state for all predecessors, + // rather than only those predecessors that actually contain an expression + // consumed in a different block. While this is potentially suboptimal, it's + // actually likely, if we have control flow within a full expression, that + // all predecessors have expression state consumed in a different block. + Environment::ExprJoinBehavior JoinBehavior = Environment::DiscardExprState; + for (const CFGBlock *Pred : Preds) { + if (Pred && AC.CFCtx.containsExprConsumedInDifferentBlock(*Pred)) { + JoinBehavior = Environment::KeepExprState; + break; + } + } + + JoinedStateBuilder Builder(AC, JoinBehavior); for (const CFGBlock *Pred : Preds) { // Skip if the `Block` is unreachable or control flow cannot get past it. if (!Pred || Pred->hasNoReturnElement()) @@ -388,7 +406,6 @@ builtinTransferInitializer(const CFGInitializer &Elt, } } assert(Member != nullptr); - assert(MemberLoc != nullptr); // FIXME: Instead of these case distinctions, we would ideally want to be able // to simply use `Environment::createObject()` here, the same way that we do @@ -404,6 +421,7 @@ builtinTransferInitializer(const CFGInitializer &Elt, ParentLoc->setChild(*Member, InitExprLoc); } else if (auto *InitExprVal = Env.getValue(*InitExpr)) { + assert(MemberLoc != nullptr); if (Member->getType()->isRecordType()) { auto *InitValStruct = cast(InitExprVal); // FIXME: Rather than performing a copy here, we should really be diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d02875c6a86d77..967319bdfc4571 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -916,7 +916,7 @@ void CodeGenModule::Release() { llvm::ConstantArray::get(ATy, UsedArray), "__clang_gpu_used_external"); addCompilerUsedGlobal(GV); } - if (LangOpts.HIP) { + if (LangOpts.HIP && !getLangOpts().OffloadingNewDriver) { // Emit a unique ID so that host and device binaries from the same // compilation unit can be associated. auto *GV = new llvm::GlobalVariable( diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 725e8a70fddfe6..85117366de0ee8 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -886,9 +886,11 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr, return LHS.compare(RHS) < 0; }); + llvm::SmallDenseSet UniqueFeats; for (auto &Feat : Features) if (auto Ext = llvm::AArch64::parseArchExtension(Feat)) - Out << 'M' << Ext->Name; + if (UniqueFeats.insert(Ext->Name).second) + Out << 'M' << Ext->Name; } std::unique_ptr diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index cecd34acbc92c0..96e6ad77f5e50d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4625,7 +4625,15 @@ Action *Driver::BuildOffloadingActions(Compilation &C, DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadAction::DeviceDependences DDep; DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + + // Compiling CUDA in non-RDC mode uses the PTX output if available. + for (Action *Input : A->getInputs()) + if (Kind == Action::OFK_Cuda && A->getType() == types::TY_Object && + !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false)) + DDep.add(*Input, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadActions.push_back(C.MakeAction(DDep, A->getType())); + ++TCAndArch; } } diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index 99a34d25cfcd56..f9eadfaec88dec 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -590,7 +590,8 @@ class ObjectFileHandler final : public FileHandler { // Copy fat object contents to the output when extracting host bundle. std::string ModifiedContent; if (Content.size() == 1u && Content.front() == 0) { - auto HostBundleOrErr = getHostBundle(); + auto HostBundleOrErr = getHostBundle( + StringRef(Input.getBufferStart(), Input.getBufferSize())); if (!HostBundleOrErr) return HostBundleOrErr.takeError(); @@ -700,7 +701,7 @@ class ObjectFileHandler final : public FileHandler { return Error::success(); } - Expected getHostBundle() { + Expected getHostBundle(StringRef Input) { TempFileHandlerRAII TempFiles; auto ModifiedObjPathOrErr = TempFiles.Create(std::nullopt); @@ -715,7 +716,24 @@ class ObjectFileHandler final : public FileHandler { ObjcopyArgs.push_back("--regex"); ObjcopyArgs.push_back("--remove-section=__CLANG_OFFLOAD_BUNDLE__.*"); ObjcopyArgs.push_back("--"); - ObjcopyArgs.push_back(BundlerConfig.InputFileNames.front()); + + StringRef ObjcopyInputFileName; + // When unbundling an archive, the content of each object file in the + // archive is passed to this function by parameter Input, which is different + // from the content of the original input archive file, therefore it needs + // to be saved to a temporary file before passed to llvm-objcopy. Otherwise, + // Input is the same as the content of the original input file, therefore + // temporary file is not needed. + if (StringRef(BundlerConfig.FilesType).starts_with("a")) { + auto InputFileOrErr = + TempFiles.Create(ArrayRef(Input.data(), Input.size())); + if (!InputFileOrErr) + return InputFileOrErr.takeError(); + ObjcopyInputFileName = *InputFileOrErr; + } else + ObjcopyInputFileName = BundlerConfig.InputFileNames.front(); + + ObjcopyArgs.push_back(ObjcopyInputFileName); ObjcopyArgs.push_back(ModifiedObjPath); if (Error Err = executeObjcopy(BundlerConfig.ObjcopyPath, ObjcopyArgs)) @@ -1628,10 +1646,8 @@ Error OffloadBundler::UnbundleArchive() { while (!CodeObject.empty()) { SmallVector CompatibleTargets; auto CodeObjectInfo = OffloadTargetInfo(CodeObject, BundlerConfig); - if (CodeObjectInfo.hasHostKind()) { - // Do nothing, we don't extract host code yet. - } else if (getCompatibleOffloadTargets(CodeObjectInfo, CompatibleTargets, - BundlerConfig)) { + if (getCompatibleOffloadTargets(CodeObjectInfo, CompatibleTargets, + BundlerConfig)) { std::string BundleData; raw_string_ostream DataStream(BundleData); if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer)) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index deb2dac80afe7c..fa17f6295d6ea7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8892,10 +8892,13 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, // Add the linker arguments to be forwarded by the wrapper. CmdArgs.push_back(Args.MakeArgString(Twine("--linker-path=") + LinkCommand->getExecutable())); - CmdArgs.push_back("--"); for (const char *LinkArg : LinkCommand->getArguments()) CmdArgs.push_back(LinkArg); + if (Args.hasFlag(options::OPT_offload_compress, + options::OPT_no_offload_compress, false)) + CmdArgs.push_back("--compress"); + const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper")); diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 177fd6310e7ee2..c6007d3cfab864 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -503,18 +503,20 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, Exec, CmdArgs, Inputs, Output)); } -static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) { - bool includePTX = true; - for (Arg *A : Args) { - if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) || - A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ))) - continue; +static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) { + // The new driver does not include PTX by default to avoid overhead. + bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false); + for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ, + options::OPT_no_cuda_include_ptx_EQ)) { A->claim(); const StringRef ArchStr = A->getValue(); - if (ArchStr == "all" || ArchStr == gpu_arch) { - includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ); - continue; - } + if (A->getOption().matches(options::OPT_cuda_include_ptx_EQ) && + (ArchStr == "all" || ArchStr == InputArch)) + includePTX = true; + else if (A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ) && + (ArchStr == "all" || ArchStr == InputArch)) + includePTX = false; } return includePTX; } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 691f3b989b81e5..451bdb9386f587 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2556,6 +2556,8 @@ static const auto &getFrontendActionTable() { {frontend::GenerateModule, OPT_emit_module}, {frontend::GenerateModuleInterface, OPT_emit_module_interface}, + {frontend::GenerateReducedModuleInterface, + OPT_emit_reduced_module_interface}, {frontend::GenerateHeaderUnit, OPT_emit_header_unit}, {frontend::GeneratePCH, OPT_emit_pch}, {frontend::GenerateInterfaceStubs, OPT_emit_interface_stubs}, @@ -4280,6 +4282,7 @@ static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) { case frontend::FixIt: case frontend::GenerateModule: case frontend::GenerateModuleInterface: + case frontend::GenerateReducedModuleInterface: case frontend::GenerateHeaderUnit: case frontend::GeneratePCH: case frontend::GenerateInterfaceStubs: diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index b9ed5dedfa4223..50338bfa670f83 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -184,12 +184,12 @@ bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI) { return true; } -std::unique_ptr -GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI, - StringRef InFile) { +std::vector> +GenerateModuleAction::CreateMultiplexConsumer(CompilerInstance &CI, + StringRef InFile) { std::unique_ptr OS = CreateOutputFile(CI, InFile); if (!OS) - return nullptr; + return {}; std::string OutputFile = CI.getFrontendOpts().OutputFile; std::string Sysroot; @@ -210,6 +210,17 @@ GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI, +CI.getFrontendOpts().BuildingImplicitModule)); Consumers.push_back(CI.getPCHContainerWriter().CreatePCHContainerGenerator( CI, std::string(InFile), OutputFile, std::move(OS), Buffer)); + return Consumers; +} + +std::unique_ptr +GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) { + std::vector> Consumers = + CreateMultiplexConsumer(CI, InFile); + if (Consumers.empty()) + return nullptr; + return std::make_unique(std::move(Consumers)); } @@ -265,7 +276,12 @@ GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance &CI, CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true; CI.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true; - return GenerateModuleAction::CreateASTConsumer(CI, InFile); + std::vector> Consumers = + CreateMultiplexConsumer(CI, InFile); + if (Consumers.empty()) + return nullptr; + + return std::make_unique(std::move(Consumers)); } std::unique_ptr @@ -274,6 +290,16 @@ GenerateModuleInterfaceAction::CreateOutputFile(CompilerInstance &CI, return CI.createDefaultOutputFile(/*Binary=*/true, InFile, "pcm"); } +std::unique_ptr +GenerateReducedModuleInterfaceAction::CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) { + auto Buffer = std::make_shared(); + return std::make_unique( + CI.getPreprocessor(), CI.getModuleCache(), + CI.getFrontendOpts().OutputFile, Buffer, + /*IncludeTimestamps=*/+CI.getFrontendOpts().IncludeTimestamps); +} + bool GenerateHeaderUnitAction::BeginSourceFileAction(CompilerInstance &CI) { if (!CI.getLangOpts().CPlusPlusModules) { CI.getDiagnostics().Report(diag::err_module_interface_requires_cpp_modules); @@ -839,7 +865,6 @@ void DumpModuleInfoAction::ExecuteAction() { const LangOptions &LO = getCurrentASTUnit().getLangOpts(); if (LO.CPlusPlusModules && !LO.CurrentModule.empty()) { - ASTReader *R = getCurrentASTUnit().getASTReader().get(); unsigned SubModuleCount = R->getTotalNumSubmodules(); serialization::ModuleFile &MF = R->getModuleManager().getPrimaryModule(); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 9b979d810fa127..48ad92063bd461 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -736,7 +736,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, } // C++23 features. if (LangOpts.CPlusPlus23) { - Builder.defineMacro("__cpp_implicit_move", "202011L"); + Builder.defineMacro("__cpp_implicit_move", "202207L"); Builder.defineMacro("__cpp_size_t_suffix", "202011L"); Builder.defineMacro("__cpp_if_consteval", "202106L"); Builder.defineMacro("__cpp_multidimensional_subscript", "202211L"); diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 925879a68cbd09..2446aee571f440 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -65,6 +65,8 @@ CreateFrontendBaseAction(CompilerInstance &CI) { return std::make_unique(); case GenerateModuleInterface: return std::make_unique(); + case GenerateReducedModuleInterface: + return std::make_unique(); case GenerateHeaderUnit: return std::make_unique(); case GeneratePCH: return std::make_unique(); diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp index 240a80e1d3d82c..1edbdf5bb98360 100644 --- a/clang/lib/InstallAPI/Frontend.cpp +++ b/clang/lib/InstallAPI/Frontend.cpp @@ -19,9 +19,10 @@ namespace clang::installapi { GlobalRecord *FrontendRecordsSlice::addGlobal( StringRef Name, RecordLinkage Linkage, GlobalRecord::Kind GV, const clang::AvailabilityInfo Avail, const Decl *D, const HeaderType Access, - SymbolFlags Flags) { + SymbolFlags Flags, bool Inlined) { - auto *GR = llvm::MachO::RecordsSlice::addGlobal(Name, Linkage, GV, Flags); + auto *GR = + llvm::MachO::RecordsSlice::addGlobal(Name, Linkage, GV, Flags, Inlined); FrontendRecords.insert({GR, FrontendAttrs{Avail, D, Access}}); return GR; } diff --git a/clang/lib/InstallAPI/Visitor.cpp b/clang/lib/InstallAPI/Visitor.cpp index fbe6f1dabe005d..1f2ef08e5aa252 100644 --- a/clang/lib/InstallAPI/Visitor.cpp +++ b/clang/lib/InstallAPI/Visitor.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/InstallAPI/Visitor.h" +#include "clang/AST/ParentMapContext.h" #include "clang/Basic/Linkage.h" #include "clang/InstallAPI/Frontend.h" #include "llvm/ADT/SmallString.h" @@ -27,6 +28,31 @@ static bool isExported(const NamedDecl *D) { (LV.getVisibility() == DefaultVisibility); } +static bool isInlined(const FunctionDecl *D) { + bool HasInlineAttribute = false; + bool NoCXXAttr = + (!D->getASTContext().getLangOpts().CPlusPlus && + !D->getASTContext().getTargetInfo().getCXXABI().isMicrosoft() && + !D->hasAttr()); + + // Check all redeclarations to find an inline attribute or keyword. + for (const auto *RD : D->redecls()) { + if (!RD->isInlined()) + continue; + HasInlineAttribute = true; + if (!(NoCXXAttr || RD->hasAttr())) + continue; + if (RD->doesThisDeclarationHaveABody() && + RD->isInlineDefinitionExternallyVisible()) + return false; + } + + if (!HasInlineAttribute) + return false; + + return true; +} + static SymbolFlags getFlags(bool WeakDef, bool ThreadLocal) { SymbolFlags Result = SymbolFlags::None; if (WeakDef) @@ -204,4 +230,56 @@ bool InstallAPIVisitor::VisitVarDecl(const VarDecl *D) { return true; } +bool InstallAPIVisitor::VisitFunctionDecl(const FunctionDecl *D) { + if (const CXXMethodDecl *M = dyn_cast(D)) { + // Skip member function in class templates. + if (M->getParent()->getDescribedClassTemplate() != nullptr) + return true; + + // Skip methods in CXX RecordDecls. + for (auto P : D->getASTContext().getParents(*M)) { + if (P.get()) + return true; + } + + // Skip CXX ConstructorDecls and DestructorDecls. + if (isa(M) || isa(M)) + return true; + } + + // Skip templated functions. + switch (D->getTemplatedKind()) { + case FunctionDecl::TK_NonTemplate: + case FunctionDecl::TK_DependentNonTemplate: + break; + case FunctionDecl::TK_MemberSpecialization: + case FunctionDecl::TK_FunctionTemplateSpecialization: + if (auto *TempInfo = D->getTemplateSpecializationInfo()) { + if (!TempInfo->isExplicitInstantiationOrSpecialization()) + return true; + } + break; + case FunctionDecl::TK_FunctionTemplate: + case FunctionDecl::TK_DependentFunctionTemplateSpecialization: + return true; + } + + auto Access = getAccessForDecl(D); + if (!Access) + return true; + auto Name = getMangledName(D); + const AvailabilityInfo Avail = AvailabilityInfo::createFromDecl(D); + const bool ExplicitInstantiation = D->getTemplateSpecializationKind() == + TSK_ExplicitInstantiationDeclaration; + const bool WeakDef = ExplicitInstantiation || D->hasAttr(); + const bool Inlined = isInlined(D); + const RecordLinkage Linkage = (Inlined || !isExported(D)) + ? RecordLinkage::Internal + : RecordLinkage::Exported; + Ctx.Slice->addGlobal(Name, Linkage, GlobalRecord::Kind::Function, Avail, D, + *Access, getFlags(WeakDef, /*ThreadLocal=*/false), + Inlined); + return true; +} + } // namespace clang::installapi diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 9f97a3c6b0be9e..37696b28976428 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -132,7 +132,8 @@ CreateCI(const llvm::opt::ArgStringList &Argv) { } // anonymous namespace llvm::Expected> -IncrementalCompilerBuilder::create(std::vector &ClangArgv) { +IncrementalCompilerBuilder::create(std::string TT, + std::vector &ClangArgv) { // If we don't know ClangArgv0 or the address of main() at this point, try // to guess it anyway (it's possible on some platforms). @@ -162,8 +163,7 @@ IncrementalCompilerBuilder::create(std::vector &ClangArgv) { TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); - driver::Driver Driver(/*MainBinaryName=*/ClangArgv[0], - llvm::sys::getProcessTriple(), Diags); + driver::Driver Driver(/*MainBinaryName=*/ClangArgv[0], TT, Diags); Driver.setCheckInputsExist(false); // the input comes from mem buffers llvm::ArrayRef RF = llvm::ArrayRef(ClangArgv); std::unique_ptr Compilation(Driver.BuildCompilation(RF)); @@ -185,7 +185,8 @@ IncrementalCompilerBuilder::CreateCpp() { Argv.push_back("-xc++"); Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end()); - return IncrementalCompilerBuilder::create(Argv); + std::string TT = TargetTriple ? *TargetTriple : llvm::sys::getProcessTriple(); + return IncrementalCompilerBuilder::create(TT, Argv); } llvm::Expected> @@ -213,7 +214,8 @@ IncrementalCompilerBuilder::createCuda(bool device) { Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end()); - return IncrementalCompilerBuilder::create(Argv); + std::string TT = TargetTriple ? *TargetTriple : llvm::sys::getProcessTriple(); + return IncrementalCompilerBuilder::create(TT, Argv); } llvm::Expected> diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 81f1c711269445..dd179414a14191 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -1234,8 +1234,11 @@ void Parser::ParseAvailabilityAttribute( } IdentifierLoc *Platform = ParseIdentifierLoc(); if (const IdentifierInfo *const Ident = Platform->Ident) { + // Disallow xrOS for availability attributes. + if (Ident->getName().contains("xrOS") || Ident->getName().contains("xros")) + Diag(Platform->Loc, diag::warn_availability_unknown_platform) << Ident; // Canonicalize platform name from "macosx" to "macos". - if (Ident->getName() == "macosx") + else if (Ident->getName() == "macosx") Platform->Ident = PP.getIdentifierInfo("macos"); // Canonicalize platform name from "macosx_app_extension" to // "macos_app_extension". @@ -5678,24 +5681,32 @@ Parser::DeclGroupPtrTy Parser::ParseTopLevelStmtDecl() { // Parse a top-level-stmt. Parser::StmtVector Stmts; ParsedStmtContext SubStmtCtx = ParsedStmtContext(); - Actions.PushFunctionScope(); + ParseScope FnScope(this, Scope::FnScope | Scope::DeclScope | + Scope::CompoundStmtScope); + TopLevelStmtDecl *TLSD = Actions.ActOnStartTopLevelStmtDecl(getCurScope()); StmtResult R = ParseStatementOrDeclaration(Stmts, SubStmtCtx); - Actions.PopFunctionScopeInfo(); if (!R.isUsable()) return nullptr; - SmallVector DeclsInGroup; - DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(R.get())); + Actions.ActOnFinishTopLevelStmtDecl(TLSD, R.get()); if (Tok.is(tok::annot_repl_input_end) && Tok.getAnnotationValue() != nullptr) { ConsumeAnnotationToken(); - cast(DeclsInGroup.back())->setSemiMissing(); + TLSD->setSemiMissing(); } - // Currently happens for things like -fms-extensions and use `__if_exists`. - for (Stmt *S : Stmts) - DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(S)); + SmallVector DeclsInGroup; + DeclsInGroup.push_back(TLSD); + + // Currently happens for things like -fms-extensions and use `__if_exists`. + for (Stmt *S : Stmts) { + // Here we should be safe as `__if_exists` and friends are not introducing + // new variables which need to live outside file scope. + TopLevelStmtDecl *D = Actions.ActOnStartTopLevelStmtDecl(getCurScope()); + Actions.ActOnFinishTopLevelStmtDecl(D, S); + DeclsInGroup.push_back(D); + } return Actions.BuildDeclaratorGroup(DeclsInGroup); } diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 4bf954b5cc4db5..1f07eddb0fb378 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3863,7 +3863,8 @@ std::optional Parser::ParseAvailabilitySpec() { StringRef Platform = AvailabilityAttr::canonicalizePlatformName(GivenPlatform); - if (AvailabilityAttr::getPrettyPlatformName(Platform).empty()) { + if (AvailabilityAttr::getPrettyPlatformName(Platform).empty() || + (GivenPlatform.contains("xros") || GivenPlatform.contains("xrOS"))) { Diag(PlatformIdentifier->Loc, diag::err_avail_query_unrecognized_platform_name) << GivenPlatform; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 6b81ee183cc440..67e56a917a51de 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -20519,12 +20519,22 @@ Decl *Sema::ActOnFileScopeAsmDecl(Expr *expr, return New; } -Decl *Sema::ActOnTopLevelStmtDecl(Stmt *Statement) { - auto *New = TopLevelStmtDecl::Create(Context, Statement); - Context.getTranslationUnitDecl()->addDecl(New); +TopLevelStmtDecl *Sema::ActOnStartTopLevelStmtDecl(Scope *S) { + auto *New = TopLevelStmtDecl::Create(Context, /*Statement=*/nullptr); + CurContext->addDecl(New); + PushDeclContext(S, New); + PushFunctionScope(); + PushCompoundScope(false); return New; } +void Sema::ActOnFinishTopLevelStmtDecl(TopLevelStmtDecl *D, Stmt *Statement) { + D->setStmt(Statement); + PopCompoundScope(); + PopFunctionScopeInfo(); + PopDeclContext(); +} + void Sema::ActOnPragmaRedefineExtname(IdentifierInfo* Name, IdentifierInfo* AliasName, SourceLocation PragmaLoc, diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 199f2523cfb5d2..e258a4f7c89415 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1715,6 +1715,8 @@ static bool CheckLiteralType(Sema &SemaRef, Sema::CheckConstexprKind Kind, static bool CheckConstexprDestructorSubobjects(Sema &SemaRef, const CXXDestructorDecl *DD, Sema::CheckConstexprKind Kind) { + assert(!SemaRef.getLangOpts().CPlusPlus23 && + "this check is obsolete for C++23"); auto Check = [&](SourceLocation Loc, QualType T, const FieldDecl *FD) { const CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); @@ -1746,6 +1748,8 @@ static bool CheckConstexprDestructorSubobjects(Sema &SemaRef, static bool CheckConstexprParameterTypes(Sema &SemaRef, const FunctionDecl *FD, Sema::CheckConstexprKind Kind) { + assert(!SemaRef.getLangOpts().CPlusPlus23 && + "this check is obsolete for C++23"); unsigned ArgIndex = 0; const auto *FT = FD->getType()->castAs(); for (FunctionProtoType::param_type_iterator i = FT->param_type_begin(), @@ -1767,6 +1771,8 @@ static bool CheckConstexprParameterTypes(Sema &SemaRef, /// true. If not, produce a suitable diagnostic and return false. static bool CheckConstexprReturnType(Sema &SemaRef, const FunctionDecl *FD, Sema::CheckConstexprKind Kind) { + assert(!SemaRef.getLangOpts().CPlusPlus23 && + "this check is obsolete for C++23"); if (CheckLiteralType(SemaRef, Kind, FD->getLocation(), FD->getReturnType(), diag::err_constexpr_non_literal_return, FD->isConsteval())) @@ -1856,16 +1862,18 @@ bool Sema::CheckConstexprFunctionDefinition(const FunctionDecl *NewFD, } } - // - its return type shall be a literal type; - if (!CheckConstexprReturnType(*this, NewFD, Kind)) + // - its return type shall be a literal type; (removed in C++23) + if (!getLangOpts().CPlusPlus23 && + !CheckConstexprReturnType(*this, NewFD, Kind)) return false; } if (auto *Dtor = dyn_cast(NewFD)) { // A destructor can be constexpr only if the defaulted destructor could be; // we don't need to check the members and bases if we already know they all - // have constexpr destructors. - if (!Dtor->getParent()->defaultedDestructorIsConstexpr()) { + // have constexpr destructors. (removed in C++23) + if (!getLangOpts().CPlusPlus23 && + !Dtor->getParent()->defaultedDestructorIsConstexpr()) { if (Kind == CheckConstexprKind::CheckValid) return false; if (!CheckConstexprDestructorSubobjects(*this, Dtor, Kind)) @@ -1873,8 +1881,9 @@ bool Sema::CheckConstexprFunctionDefinition(const FunctionDecl *NewFD, } } - // - each of its parameter types shall be a literal type; - if (!CheckConstexprParameterTypes(*this, NewFD, Kind)) + // - each of its parameter types shall be a literal type; (removed in C++23) + if (!getLangOpts().CPlusPlus23 && + !CheckConstexprParameterTypes(*this, NewFD, Kind)) return false; Stmt *Body = NewFD->getBody(); @@ -2457,7 +2466,8 @@ static bool CheckConstexprFunctionBody(Sema &SemaRef, const FunctionDecl *Dcl, // function", so is not checked in CheckValid mode. SmallVector Diags; if (Kind == Sema::CheckConstexprKind::Diagnose && - !Expr::isPotentialConstantExpr(Dcl, Diags)) { + !Expr::isPotentialConstantExpr(Dcl, Diags) && + !SemaRef.getLangOpts().CPlusPlus23) { SemaRef.Diag(Dcl->getLocation(), diag::ext_constexpr_function_never_constant_expr) << isa(Dcl) << Dcl->isConsteval() @@ -7535,21 +7545,23 @@ static bool defaultedSpecialMemberIsConstexpr( // C++1y [class.copy]p26: // -- [the class] is a literal type, and - if (!Ctor && !ClassDecl->isLiteral()) + if (!Ctor && !ClassDecl->isLiteral() && !S.getLangOpts().CPlusPlus23) return false; // -- every constructor involved in initializing [...] base class // sub-objects shall be a constexpr constructor; // -- the assignment operator selected to copy/move each direct base // class is a constexpr function, and - for (const auto &B : ClassDecl->bases()) { - const RecordType *BaseType = B.getType()->getAs(); - if (!BaseType) - continue; - CXXRecordDecl *BaseClassDecl = cast(BaseType->getDecl()); - if (!specialMemberIsConstexpr(S, BaseClassDecl, CSM, 0, ConstArg, - InheritedCtor, Inherited)) - return false; + if (!S.getLangOpts().CPlusPlus23) { + for (const auto &B : ClassDecl->bases()) { + const RecordType *BaseType = B.getType()->getAs(); + if (!BaseType) + continue; + CXXRecordDecl *BaseClassDecl = cast(BaseType->getDecl()); + if (!specialMemberIsConstexpr(S, BaseClassDecl, CSM, 0, ConstArg, + InheritedCtor, Inherited)) + return false; + } } // -- every constructor involved in initializing non-static data members @@ -7559,20 +7571,22 @@ static bool defaultedSpecialMemberIsConstexpr( // -- for each non-static data member of X that is of class type (or array // thereof), the assignment operator selected to copy/move that member is // a constexpr function - for (const auto *F : ClassDecl->fields()) { - if (F->isInvalidDecl()) - continue; - if (CSM == Sema::CXXDefaultConstructor && F->hasInClassInitializer()) - continue; - QualType BaseType = S.Context.getBaseElementType(F->getType()); - if (const RecordType *RecordTy = BaseType->getAs()) { - CXXRecordDecl *FieldRecDecl = cast(RecordTy->getDecl()); - if (!specialMemberIsConstexpr(S, FieldRecDecl, CSM, - BaseType.getCVRQualifiers(), - ConstArg && !F->isMutable())) + if (!S.getLangOpts().CPlusPlus23) { + for (const auto *F : ClassDecl->fields()) { + if (F->isInvalidDecl()) + continue; + if (CSM == Sema::CXXDefaultConstructor && F->hasInClassInitializer()) + continue; + QualType BaseType = S.Context.getBaseElementType(F->getType()); + if (const RecordType *RecordTy = BaseType->getAs()) { + CXXRecordDecl *FieldRecDecl = cast(RecordTy->getDecl()); + if (!specialMemberIsConstexpr(S, FieldRecDecl, CSM, + BaseType.getCVRQualifiers(), + ConstArg && !F->isMutable())) + return false; + } else if (CSM == Sema::CXXDefaultConstructor) { return false; - } else if (CSM == Sema::CXXDefaultConstructor) { - return false; + } } } @@ -7858,18 +7872,17 @@ bool Sema::CheckExplicitlyDefaultedSpecialMember(CXXMethodDecl *MD, MD->isConstexpr() && !Constexpr && MD->getTemplatedKind() == FunctionDecl::TK_NonTemplate) { if (!MD->isConsteval() && RD->getNumVBases()) { - Diag(MD->getBeginLoc(), diag::err_incorrect_defaulted_constexpr_with_vb) + Diag(MD->getBeginLoc(), + diag::err_incorrect_defaulted_constexpr_with_vb) << CSM; for (const auto &I : RD->vbases()) Diag(I.getBeginLoc(), diag::note_constexpr_virtual_base_here); } else { - Diag(MD->getBeginLoc(), MD->isConsteval() - ? diag::err_incorrect_defaulted_consteval - : diag::err_incorrect_defaulted_constexpr) - << CSM; + Diag(MD->getBeginLoc(), diag::err_incorrect_defaulted_constexpr) + << CSM << MD->isConsteval(); } - // FIXME: Explain why the special member can't be constexpr. - HadError = true; + HadError = true; + // FIXME: Explain why the special member can't be constexpr. } if (First) { @@ -9101,13 +9114,11 @@ bool Sema::CheckExplicitlyDefaultedComparison(Scope *S, FunctionDecl *FD, // - if the function is a constructor or destructor, its class does not // have any virtual base classes. if (FD->isConstexpr()) { - if (CheckConstexprReturnType(*this, FD, CheckConstexprKind::Diagnose) && + if (!getLangOpts().CPlusPlus23 && + CheckConstexprReturnType(*this, FD, CheckConstexprKind::Diagnose) && CheckConstexprParameterTypes(*this, FD, CheckConstexprKind::Diagnose) && !Info.Constexpr) { - Diag(FD->getBeginLoc(), - getLangOpts().CPlusPlus23 - ? diag::warn_cxx23_compat_defaulted_comparison_constexpr_mismatch - : diag::ext_defaulted_comparison_constexpr_mismatch) + Diag(FD->getBeginLoc(), diag::err_defaulted_comparison_constexpr_mismatch) << FD->isImplicit() << (int)DCK << FD->isConsteval(); DefaultedComparisonAnalyzer(*this, RD, FD, DCK, DefaultedComparisonAnalyzer::ExplainConstexpr) diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index a9edc7e68b53b3..6904c924c2fd3d 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -4623,10 +4623,12 @@ ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream, SmallVectorImpl &Buffer, InMemoryModuleCache &ModuleCache, ArrayRef> Extensions, - bool IncludeTimestamps, bool BuildingImplicitModule) + bool IncludeTimestamps, bool BuildingImplicitModule, + bool GeneratingReducedBMI) : Stream(Stream), Buffer(Buffer), ModuleCache(ModuleCache), IncludeTimestamps(IncludeTimestamps), - BuildingImplicitModule(BuildingImplicitModule) { + BuildingImplicitModule(BuildingImplicitModule), + GeneratingReducedBMI(GeneratingReducedBMI) { for (const auto &Ext : Extensions) { if (auto Writer = Ext->createExtensionWriter(*this)) ModuleFileExtensionWriters.push_back(std::move(Writer)); @@ -5457,18 +5459,20 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { // Add a trailing update record, if any. These must go last because we // lazily load their attached statement. - if (HasUpdatedBody) { - const auto *Def = cast(D); - Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION); - Record.push_back(Def->isInlined()); - Record.AddSourceLocation(Def->getInnerLocStart()); - Record.AddFunctionDefinition(Def); - } else if (HasAddedVarDefinition) { - const auto *VD = cast(D); - Record.push_back(UPD_CXX_ADDED_VAR_DEFINITION); - Record.push_back(VD->isInline()); - Record.push_back(VD->isInlineSpecified()); - Record.AddVarDeclInit(VD); + if (!GeneratingReducedBMI || !CanElideDeclDef(D)) { + if (HasUpdatedBody) { + const auto *Def = cast(D); + Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION); + Record.push_back(Def->isInlined()); + Record.AddSourceLocation(Def->getInnerLocStart()); + Record.AddFunctionDefinition(Def); + } else if (HasAddedVarDefinition) { + const auto *VD = cast(D); + Record.push_back(UPD_CXX_ADDED_VAR_DEFINITION); + Record.push_back(VD->isInline()); + Record.push_back(VD->isInlineSpecified()); + Record.AddVarDeclInit(VD); + } } OffsetsRecord.push_back(GetDeclRef(D)); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index e73800100e3ccf..e1862de4a35b8f 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -16,6 +16,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclVisitor.h" #include "clang/AST/Expr.h" +#include "clang/AST/ODRHash.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/Basic/SourceManager.h" @@ -40,11 +41,14 @@ namespace clang { serialization::DeclCode Code; unsigned AbbrevToUse; + bool GeneratingReducedBMI = false; + public: ASTDeclWriter(ASTWriter &Writer, ASTContext &Context, - ASTWriter::RecordDataImpl &Record) + ASTWriter::RecordDataImpl &Record, bool GeneratingReducedBMI) : Writer(Writer), Context(Context), Record(Writer, Record), - Code((serialization::DeclCode)0), AbbrevToUse(0) {} + Code((serialization::DeclCode)0), AbbrevToUse(0), + GeneratingReducedBMI(GeneratingReducedBMI) {} uint64_t Emit(Decl *D) { if (!Code) @@ -270,6 +274,27 @@ namespace clang { }; } +bool clang::CanElideDeclDef(const Decl *D) { + if (auto *FD = dyn_cast(D)) { + if (FD->isInlined() || FD->isConstexpr()) + return false; + + if (FD->isDependentContext()) + return false; + } + + if (auto *VD = dyn_cast(D)) { + if (!VD->getDeclContext()->getRedeclContext()->isFileContext() || + VD->isInline() || VD->isConstexpr() || isa(VD)) + return false; + + if (VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) + return false; + } + + return true; +} + void ASTDeclWriter::Visit(Decl *D) { DeclVisitor::Visit(D); @@ -285,9 +310,12 @@ void ASTDeclWriter::Visit(Decl *D) { // have been written. We want it last because we will not read it back when // retrieving it from the AST, we'll just lazily set the offset. if (auto *FD = dyn_cast(D)) { - Record.push_back(FD->doesThisDeclarationHaveABody()); - if (FD->doesThisDeclarationHaveABody()) - Record.AddFunctionDefinition(FD); + if (!GeneratingReducedBMI || !CanElideDeclDef(FD)) { + Record.push_back(FD->doesThisDeclarationHaveABody()); + if (FD->doesThisDeclarationHaveABody()) + Record.AddFunctionDefinition(FD); + } else + Record.push_back(0); } // Similar to FunctionDecls, handle VarDecl's initializer here and write it @@ -295,7 +323,10 @@ void ASTDeclWriter::Visit(Decl *D) { // we have finished recursive deserialization, because it can recursively // refer back to the variable. if (auto *VD = dyn_cast(D)) { - Record.AddVarDeclInit(VD); + if (!GeneratingReducedBMI || !CanElideDeclDef(VD)) + Record.AddVarDeclInit(VD); + else + Record.push_back(0); } // And similarly for FieldDecls. We already serialized whether there is a @@ -2729,7 +2760,7 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) { assert(ID >= FirstDeclID && "invalid decl ID"); RecordData Record; - ASTDeclWriter W(*this, Context, Record); + ASTDeclWriter W(*this, Context, Record, GeneratingReducedBMI); // Build a record for this declaration W.Visit(D); diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp index cf8084333811f1..2b511b2d5a90a2 100644 --- a/clang/lib/Serialization/GeneratePCH.cpp +++ b/clang/lib/Serialization/GeneratePCH.cpp @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// #include "clang/AST/ASTContext.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/SemaConsumer.h" +#include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTWriter.h" #include "llvm/Bitstream/BitstreamWriter.h" @@ -25,11 +27,12 @@ PCHGenerator::PCHGenerator( StringRef OutputFile, StringRef isysroot, std::shared_ptr Buffer, ArrayRef> Extensions, bool AllowASTWithErrors, bool IncludeTimestamps, - bool BuildingImplicitModule, bool ShouldCacheASTInMemory) + bool BuildingImplicitModule, bool ShouldCacheASTInMemory, + bool GeneratingReducedBMI) : PP(PP), OutputFile(OutputFile), isysroot(isysroot.str()), SemaPtr(nullptr), Buffer(std::move(Buffer)), Stream(this->Buffer->Data), Writer(Stream, this->Buffer->Data, ModuleCache, Extensions, - IncludeTimestamps, BuildingImplicitModule), + IncludeTimestamps, BuildingImplicitModule, GeneratingReducedBMI), AllowASTWithErrors(AllowASTWithErrors), ShouldCacheASTInMemory(ShouldCacheASTInMemory) { this->Buffer->IsComplete = false; @@ -78,3 +81,33 @@ ASTMutationListener *PCHGenerator::GetASTMutationListener() { ASTDeserializationListener *PCHGenerator::GetASTDeserializationListener() { return &Writer; } + +ReducedBMIGenerator::ReducedBMIGenerator(const Preprocessor &PP, + InMemoryModuleCache &ModuleCache, + StringRef OutputFile, + std::shared_ptr Buffer, + bool IncludeTimestamps) + : PCHGenerator( + PP, ModuleCache, OutputFile, llvm::StringRef(), Buffer, + /*Extensions=*/ArrayRef>(), + /*AllowASTWithErrors*/ false, /*IncludeTimestamps=*/IncludeTimestamps, + /*BuildingImplicitModule=*/false, /*ShouldCacheASTInMemory=*/false, + /*GeneratingReducedBMI=*/true) {} + +void ReducedBMIGenerator::HandleTranslationUnit(ASTContext &Ctx) { + PCHGenerator::HandleTranslationUnit(Ctx); + + if (!isComplete()) + return; + + std::error_code EC; + auto OS = std::make_unique(getOutputFile(), EC); + if (EC) { + getDiagnostics().Report(diag::err_fe_unable_to_open_output) + << getOutputFile() << EC.message() << "\n"; + return; + } + + *OS << getBufferPtr()->Data; + OS->flush(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 01b191ab0eeaf4..287f6a52870056 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -253,6 +253,19 @@ class TrivialFunctionAnalysisVisitor return true; } + template + bool WithCachedResult(const Stmt *S, CheckFunction Function) { + // If the statement isn't in the cache, conservatively assume that + // it's not trivial until analysis completes. Insert false to the cache + // first to avoid infinite recursion. + auto [It, IsNew] = Cache.insert(std::make_pair(S, false)); + if (!IsNew) + return It->second; + bool Result = Function(); + Cache[S] = Result; + return Result; + } + public: using CacheTy = TrivialFunctionAnalysis::CacheTy; @@ -267,7 +280,7 @@ class TrivialFunctionAnalysisVisitor bool VisitCompoundStmt(const CompoundStmt *CS) { // A compound statement is allowed as long each individual sub-statement // is trivial. - return VisitChildren(CS); + return WithCachedResult(CS, [&]() { return VisitChildren(CS); }); } bool VisitReturnStmt(const ReturnStmt *RS) { @@ -279,17 +292,36 @@ class TrivialFunctionAnalysisVisitor bool VisitDeclStmt(const DeclStmt *DS) { return VisitChildren(DS); } bool VisitDoStmt(const DoStmt *DS) { return VisitChildren(DS); } - bool VisitIfStmt(const IfStmt *IS) { return VisitChildren(IS); } + bool VisitIfStmt(const IfStmt *IS) { + return WithCachedResult(IS, [&]() { return VisitChildren(IS); }); + } + bool VisitForStmt(const ForStmt *FS) { + return WithCachedResult(FS, [&]() { return VisitChildren(FS); }); + } + bool VisitCXXForRangeStmt(const CXXForRangeStmt *FS) { + return WithCachedResult(FS, [&]() { return VisitChildren(FS); }); + } + bool VisitWhileStmt(const WhileStmt *WS) { + return WithCachedResult(WS, [&]() { return VisitChildren(WS); }); + } bool VisitSwitchStmt(const SwitchStmt *SS) { return VisitChildren(SS); } bool VisitCaseStmt(const CaseStmt *CS) { return VisitChildren(CS); } bool VisitDefaultStmt(const DefaultStmt *DS) { return VisitChildren(DS); } bool VisitUnaryOperator(const UnaryOperator *UO) { // Operator '*' and '!' are allowed as long as the operand is trivial. - if (UO->getOpcode() == UO_Deref || UO->getOpcode() == UO_AddrOf || - UO->getOpcode() == UO_LNot) + auto op = UO->getOpcode(); + if (op == UO_Deref || op == UO_AddrOf || op == UO_LNot) return Visit(UO->getSubExpr()); + if (UO->isIncrementOp() || UO->isDecrementOp()) { + // Allow increment or decrement of a POD type. + if (auto *RefExpr = dyn_cast(UO->getSubExpr())) { + if (auto *Decl = dyn_cast(RefExpr->getDecl())) + return Decl->isLocalVarDeclOrParm() && + Decl->getType().isPODType(Decl->getASTContext()); + } + } // Other operators are non-trivial. return false; } @@ -304,22 +336,6 @@ class TrivialFunctionAnalysisVisitor return VisitChildren(CO); } - bool VisitDeclRefExpr(const DeclRefExpr *DRE) { - if (auto *decl = DRE->getDecl()) { - if (isa(decl)) - return true; - if (isa(decl)) - return true; - if (auto *VD = dyn_cast(decl)) { - if (VD->hasConstantInitialization() && VD->getEvaluatedValue()) - return true; - auto *Init = VD->getInit(); - return !Init || Visit(Init); - } - } - return false; - } - bool VisitAtomicExpr(const AtomicExpr *E) { return VisitChildren(E); } bool VisitStaticAssertDecl(const StaticAssertDecl *SAD) { @@ -436,6 +452,11 @@ class TrivialFunctionAnalysisVisitor return true; } + bool VisitDeclRefExpr(const DeclRefExpr *DRE) { + // The use of a variable is trivial. + return true; + } + // Constant literal expressions are always trivial bool VisitIntegerLiteral(const IntegerLiteral *E) { return true; } bool VisitFloatingLiteral(const FloatingLiteral *E) { return true; } @@ -449,7 +470,7 @@ class TrivialFunctionAnalysisVisitor } private: - CacheTy Cache; + CacheTy &Cache; }; bool TrivialFunctionAnalysis::isTrivialImpl( @@ -474,4 +495,17 @@ bool TrivialFunctionAnalysis::isTrivialImpl( return Result; } +bool TrivialFunctionAnalysis::isTrivialImpl( + const Stmt *S, TrivialFunctionAnalysis::CacheTy &Cache) { + // If the statement isn't in the cache, conservatively assume that + // it's not trivial until analysis completes. Unlike a function case, + // we don't insert an entry into the cache until Visit returns + // since Visit* functions themselves make use of the cache. + + TrivialFunctionAnalysisVisitor V(Cache); + bool Result = V.Visit(S); + assert(Cache.contains(S) && "Top-level statement not properly cached!"); + return Result; +} + } // namespace clang diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h index e07cd31395747d..9ed8e7cab6abb9 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h @@ -11,6 +11,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerUnion.h" #include namespace clang { @@ -19,6 +20,7 @@ class CXXMethodDecl; class CXXRecordDecl; class Decl; class FunctionDecl; +class Stmt; class Type; // Ref-countability of a type is implicitly defined by Ref and RefPtr @@ -71,14 +73,17 @@ class TrivialFunctionAnalysis { public: /// \returns true if \p D is a "trivial" function. bool isTrivial(const Decl *D) const { return isTrivialImpl(D, TheCache); } + bool isTrivial(const Stmt *S) const { return isTrivialImpl(S, TheCache); } private: friend class TrivialFunctionAnalysisVisitor; - using CacheTy = llvm::DenseMap; + using CacheTy = + llvm::DenseMap, bool>; mutable CacheTy TheCache{}; static bool isTrivialImpl(const Decl *D, CacheTy &Cache); + static bool isTrivialImpl(const Stmt *S, CacheTy &Cache); }; } // namespace clang diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp index 5a72f53b12edaa..6036ad58cf253c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp @@ -26,28 +26,6 @@ using namespace ento; namespace { -// for ( int a = ...) ... true -// for ( int a : ...) ... true -// if ( int* a = ) ... true -// anything else ... false -bool isDeclaredInForOrIf(const VarDecl *Var) { - assert(Var); - auto &ASTCtx = Var->getASTContext(); - auto parent = ASTCtx.getParents(*Var); - - if (parent.size() == 1) { - if (auto *DS = parent.begin()->get()) { - DynTypedNodeList grandParent = ASTCtx.getParents(*DS); - if (grandParent.size() == 1) { - return grandParent.begin()->get() || - grandParent.begin()->get() || - grandParent.begin()->get(); - } - } - } - return false; -} - // FIXME: should be defined by anotations in the future bool isRefcountedStringsHack(const VarDecl *V) { assert(V); @@ -143,6 +121,11 @@ class UncountedLocalVarsChecker // want to visit those, so we make our own RecursiveASTVisitor. struct LocalVisitor : public RecursiveASTVisitor { const UncountedLocalVarsChecker *Checker; + + TrivialFunctionAnalysis TFA; + + using Base = RecursiveASTVisitor; + explicit LocalVisitor(const UncountedLocalVarsChecker *Checker) : Checker(Checker) { assert(Checker); @@ -155,6 +138,36 @@ class UncountedLocalVarsChecker Checker->visitVarDecl(V); return true; } + + bool TraverseIfStmt(IfStmt *IS) { + if (!TFA.isTrivial(IS)) + return Base::TraverseIfStmt(IS); + return true; + } + + bool TraverseForStmt(ForStmt *FS) { + if (!TFA.isTrivial(FS)) + return Base::TraverseForStmt(FS); + return true; + } + + bool TraverseCXXForRangeStmt(CXXForRangeStmt *FRS) { + if (!TFA.isTrivial(FRS)) + return Base::TraverseCXXForRangeStmt(FRS); + return true; + } + + bool TraverseWhileStmt(WhileStmt *WS) { + if (!TFA.isTrivial(WS)) + return Base::TraverseWhileStmt(WS); + return true; + } + + bool TraverseCompoundStmt(CompoundStmt *CS) { + if (!TFA.isTrivial(CS)) + return Base::TraverseCompoundStmt(CS); + return true; + } }; LocalVisitor visitor(this); @@ -189,18 +202,16 @@ class UncountedLocalVarsChecker dyn_cast_or_null(Ref->getFoundDecl())) { const auto *MaybeGuardianArgType = MaybeGuardian->getType().getTypePtr(); - if (!MaybeGuardianArgType) - return; - const CXXRecordDecl *const MaybeGuardianArgCXXRecord = - MaybeGuardianArgType->getAsCXXRecordDecl(); - if (!MaybeGuardianArgCXXRecord) - return; - - if (MaybeGuardian->isLocalVarDecl() && - (isRefCounted(MaybeGuardianArgCXXRecord) || - isRefcountedStringsHack(MaybeGuardian)) && - isGuardedScopeEmbeddedInGuardianScope(V, MaybeGuardian)) { - return; + if (MaybeGuardianArgType) { + const CXXRecordDecl *const MaybeGuardianArgCXXRecord = + MaybeGuardianArgType->getAsCXXRecordDecl(); + if (MaybeGuardianArgCXXRecord) { + if (MaybeGuardian->isLocalVarDecl() && + (isRefCounted(MaybeGuardianArgCXXRecord) || + isRefcountedStringsHack(MaybeGuardian)) && + isGuardedScopeEmbeddedInGuardianScope(V, MaybeGuardian)) + return; + } } // Parameters are guaranteed to be safe for the duration of the call @@ -219,9 +230,6 @@ class UncountedLocalVarsChecker if (!V->isLocalVarDecl()) return true; - if (isDeclaredInForOrIf(V)) - return true; - return false; } diff --git a/clang/test/AST/Interp/complex.c b/clang/test/AST/Interp/complex.c index b07d0241da12d6..c9c2efb5974531 100644 --- a/clang/test/AST/Interp/complex.c +++ b/clang/test/AST/Interp/complex.c @@ -1,9 +1,6 @@ // RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both -Wno-unused-value %s // RUN: %clang_cc1 -verify=ref,both -Wno-unused-value %s -// expected-no-diagnostics -// ref-no-diagnostics - void blah() { __complex__ unsigned xx; __complex__ signed yy; @@ -12,3 +9,8 @@ void blah() { /// The following line calls into the constant interpreter. result = xx * yy; } + + +_Static_assert((0.0 + 0.0j) == (0.0 + 0.0j), ""); +_Static_assert((0.0 + 0.0j) != (0.0 + 0.0j), ""); // both-error {{static assertion}} \ + // both-note {{evaluates to}} diff --git a/clang/test/AST/Interp/complex.cpp b/clang/test/AST/Interp/complex.cpp index 8acce7b734d85a..6a42afc68d26c7 100644 --- a/clang/test/AST/Interp/complex.cpp +++ b/clang/test/AST/Interp/complex.cpp @@ -266,3 +266,50 @@ namespace Builtin { constexpr _Complex float C = __builtin_complex(10.0f, 20.0); // both-error {{arguments are of different types}} } + +namespace Cmp { + static_assert((0.0 + 0.0j) == (0.0 + 0.0j)); + static_assert((0.0 + 0.0j) != (0.0 + 0.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + + static_assert((0.0 + 0.0j) == 0.0); + static_assert(0.0 == (0.0 + 0.0j)); + static_assert(0.0 == 0.0j); + static_assert((0.0 + 1.0j) != 0.0); + static_assert(1.0 != (0.0 + 0.0j)); + static_assert(0.0 != 1.0j); + + // Walk around the complex plane stepping between angular differences and + // equality. + static_assert((1.0 + 0.0j) == (0.0 + 0.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((1.0 + 0.0j) == (1.0 + 0.0j)); + static_assert((1.0 + 1.0j) == (1.0 + 0.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((1.0 + 1.0j) == (1.0 + 1.0j)); + static_assert((0.0 + 1.0j) == (1.0 + 1.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((0.0 + 1.0j) == (0.0 + 1.0j)); + static_assert((-1.0 + 1.0j) == (0.0 + 1.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((-1.0 + 1.0j) == (-1.0 + 1.0j)); + static_assert((-1.0 + 0.0j) == (-1.0 + 1.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((-1.0 + 0.0j) == (-1.0 + 0.0j)); + static_assert((-1.0 - 1.0j) == (-1.0 + 0.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((-1.0 - 1.0j) == (-1.0 - 1.0j)); + static_assert((0.0 - 1.0j) == (-1.0 - 1.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((0.0 - 1.0j) == (0.0 - 1.0j)); + static_assert((1.0 - 1.0j) == (0.0 - 1.0j)); // both-error {{static assertion}} \ + // both-note {{evaluates to}} + static_assert((1.0 - 1.0j) == (1.0 - 1.0j)); + + /// Make sure these are rejected before reaching the constexpr interpreter. + static_assert((0.0 + 0.0j) & (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}} + static_assert((0.0 + 0.0j) | (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}} + static_assert((0.0 + 0.0j) < (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}} + static_assert((0.0 + 0.0j) > (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}} + static_assert((0.0 + 0.0j) ^ (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}} +} diff --git a/clang/test/AST/Interp/cxx23.cpp b/clang/test/AST/Interp/cxx23.cpp index f1df936a5abe74..127b58915127cf 100644 --- a/clang/test/AST/Interp/cxx23.cpp +++ b/clang/test/AST/Interp/cxx23.cpp @@ -1,82 +1,58 @@ -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref20,all %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref20,all,all-20 %s // RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=ref23,all %s -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20,all %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20,all,all-20 %s -fexperimental-new-constant-interpreter // RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=expected23,all %s -fexperimental-new-constant-interpreter /// FIXME: The new interpreter is missing all the 'control flows through...' diagnostics. constexpr int f(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} \ - // expected20-error {{constexpr function never produces a constant expression}} \ - // expected23-error {{constexpr function never produces a constant expression}} + // expected20-error {{constexpr function never produces a constant expression}} static const int m = n; // ref20-note {{control flows through the definition of a static variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a static variable}} \ // expected20-warning {{is a C++23 extension}} \ // expected20-note {{declared here}} \ - // expected23-note {{declared here}} - return m; // expected20-note {{initializer of 'm' is not a constant expression}} \ - // expected23-note {{initializer of 'm' is not a constant expression}} + return m; // expected20-note {{initializer of 'm' is not a constant expression}} } constexpr int g(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} \ - // expected20-error {{constexpr function never produces a constant expression}} \ - // expected23-error {{constexpr function never produces a constant expression}} + // expected20-error {{constexpr function never produces a constant expression}} thread_local const int m = n; // ref20-note {{control flows through the definition of a thread_local variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a thread_local variable}} \ // expected20-warning {{is a C++23 extension}} \ - // expected20-note {{declared here}} \ - // expected23-note {{declared here}} - return m; // expected20-note {{initializer of 'm' is not a constant expression}} \ - // expected23-note {{initializer of 'm' is not a constant expression}} + // expected20-note {{declared here}} + return m; // expected20-note {{initializer of 'm' is not a constant expression}} } constexpr int c_thread_local(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} \ - // expected20-error {{constexpr function never produces a constant expression}} \ - // expected23-error {{constexpr function never produces a constant expression}} + // expected20-error {{constexpr function never produces a constant expression}} static _Thread_local int m = 0; // ref20-note {{control flows through the definition of a thread_local variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a thread_local variable}} \ // expected20-warning {{is a C++23 extension}} \ - // expected20-note {{declared here}} \ - // expected23-note {{declared here}} - return m; // expected20-note {{read of non-const variable}} \ - // expected23-note {{read of non-const variable}} + // expected20-note {{declared here}} + return m; // expected20-note {{read of non-const variable}} } constexpr int gnu_thread_local(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} \ - // expected20-error {{constexpr function never produces a constant expression}} \ - // expected23-error {{constexpr function never produces a constant expression}} + // expected20-error {{constexpr function never produces a constant expression}} static __thread int m = 0; // ref20-note {{control flows through the definition of a thread_local variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a thread_local variable}} \ // expected20-warning {{is a C++23 extension}} \ - // expected20-note {{declared here}} \ - // expected23-note {{declared here}} - return m; // expected20-note {{read of non-const variable}} \ - // expected23-note {{read of non-const variable}} + // expected20-note {{declared here}} + return m; // expected20-note {{read of non-const variable}} } -constexpr int h(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} +constexpr int h(int n) { // ref20-error {{constexpr function never produces a constant expression}} static const int m = n; // ref20-note {{control flows through the definition of a static variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a static variable}} \ // expected20-warning {{is a C++23 extension}} return &m - &m; } -constexpr int i(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // ref23-error {{constexpr function never produces a constant expression}} +constexpr int i(int n) { // ref20-error {{constexpr function never produces a constant expression}} thread_local const int m = n; // ref20-note {{control flows through the definition of a thread_local variable}} \ // ref20-warning {{is a C++23 extension}} \ - // ref23-note {{control flows through the definition of a thread_local variable}} \ // expected20-warning {{is a C++23 extension}} return &m - &m; } @@ -132,8 +108,9 @@ namespace StaticOperators { static_assert(f2() == 3); struct S1 { - constexpr S1() { // all-error {{never produces a constant expression}} - throw; // all-note 2{{not valid in a constant expression}} + constexpr S1() { // all-20-error {{never produces a constant expression}} + throw; // all-note {{not valid in a constant expression}} \ + // all-20-note {{not valid in a constant expression}} } static constexpr int operator()() { return 3; } // ref20-warning {{C++23 extension}} \ // expected20-warning {{C++23 extension}} diff --git a/clang/test/Analysis/Checkers/WebKit/mock-types.h b/clang/test/Analysis/Checkers/WebKit/mock-types.h index e2b3401d407392..aab99197dfa49e 100644 --- a/clang/test/Analysis/Checkers/WebKit/mock-types.h +++ b/clang/test/Analysis/Checkers/WebKit/mock-types.h @@ -62,6 +62,8 @@ struct RefCountable { static Ref create(); void ref() {} void deref() {} + void method(); + int trivial() { return 123; } }; template T *downcast(T *t) { return t; } diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp index 0fcd3b21376caf..00673e91f471ea 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp @@ -2,6 +2,8 @@ #include "mock-types.h" +void someFunction(); + namespace raw_ptr { void foo() { RefCountable *bar; @@ -16,6 +18,13 @@ void foo_ref() { RefCountable automatic; RefCountable &bar = automatic; // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + someFunction(); + bar.method(); +} + +void foo_ref_trivial() { + RefCountable automatic; + RefCountable &bar = automatic; } void bar_ref(RefCountable &) {} @@ -32,6 +41,8 @@ void foo2() { // missing embedded scope here RefCountable *bar = foo.get(); // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + someFunction(); + bar->method(); } void foo3() { @@ -47,11 +58,35 @@ void foo4() { { RefCountable *bar = foo.get(); } } } + +void foo5() { + RefPtr foo; + auto* bar = foo.get(); + bar->trivial(); +} + +void foo6() { + RefPtr foo; + auto* bar = foo.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + bar->method(); +} + +struct SelfReferencingStruct { + SelfReferencingStruct* ptr; + RefCountable* obj { nullptr }; +}; + +void foo7(RefCountable* obj) { + SelfReferencingStruct bar = { &bar, obj }; + bar.obj->method(); +} + } // namespace guardian_scopes namespace auto_keyword { class Foo { - RefCountable *provide_ref_ctnbl() { return nullptr; } + RefCountable *provide_ref_ctnbl(); void evil_func() { RefCountable *bar = provide_ref_ctnbl(); @@ -62,13 +97,24 @@ class Foo { // expected-warning@-1{{Local variable 'baz2' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} [[clang::suppress]] auto *baz_suppressed = provide_ref_ctnbl(); // no-warning } + + void func() { + RefCountable *bar = provide_ref_ctnbl(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + if (bar) + bar->method(); + } }; } // namespace auto_keyword namespace guardian_casts { void foo1() { RefPtr foo; - { RefCountable *bar = downcast(foo.get()); } + { + RefCountable *bar = downcast(foo.get()); + bar->method(); + } + foo->method(); } void foo2() { @@ -76,6 +122,7 @@ void foo2() { { RefCountable *bar = static_cast(downcast(foo.get())); + someFunction(); } } } // namespace guardian_casts @@ -83,7 +130,11 @@ void foo2() { namespace guardian_ref_conversion_operator { void foo() { Ref rc; - { RefCountable &rr = rc; } + { + RefCountable &rr = rc; + rr.method(); + someFunction(); + } } } // namespace guardian_ref_conversion_operator @@ -92,9 +143,47 @@ RefCountable *provide_ref_ctnbl() { return nullptr; } void foo() { // no warnings - if (RefCountable *a = provide_ref_ctnbl()) { } - for (RefCountable *a = provide_ref_ctnbl(); a != nullptr;) { } + if (RefCountable *a = provide_ref_ctnbl()) + a->trivial(); + for (RefCountable *b = provide_ref_ctnbl(); b != nullptr;) + b->trivial(); RefCountable *array[1]; - for (RefCountable *a : array) { } + for (RefCountable *c : array) + c->trivial(); + while (RefCountable *d = provide_ref_ctnbl()) + d->trivial(); + do { + RefCountable *e = provide_ref_ctnbl(); + e->trivial(); + } while (1); + someFunction(); } + +void bar() { + if (RefCountable *a = provide_ref_ctnbl()) { + // expected-warning@-1{{Local variable 'a' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + a->method(); + } + for (RefCountable *b = provide_ref_ctnbl(); b != nullptr;) { + // expected-warning@-1{{Local variable 'b' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + b->method(); + } + RefCountable *array[1]; + for (RefCountable *c : array) { + // expected-warning@-1{{Local variable 'c' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + c->method(); + } + + while (RefCountable *d = provide_ref_ctnbl()) { + // expected-warning@-1{{Local variable 'd' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + d->method(); + } + do { + RefCountable *e = provide_ref_ctnbl(); + // expected-warning@-1{{Local variable 'e' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + e->method(); + } while (1); + someFunction(); +} + } // namespace ignore_for_if diff --git a/clang/test/CXX/basic/basic.link/p10-ex2.cpp b/clang/test/CXX/basic/basic.link/p10-ex2.cpp index 95fdb56f78d625..e985ce37a93495 100644 --- a/clang/test/CXX/basic/basic.link/p10-ex2.cpp +++ b/clang/test/CXX/basic/basic.link/p10-ex2.cpp @@ -5,7 +5,9 @@ // // RUN: %clang_cc1 -std=c++20 M.cpp -fsyntax-only -DTEST_INTERFACE -verify // RUN: %clang_cc1 -std=c++20 M.cpp -emit-module-interface -o M.pcm +// RUN: %clang_cc1 -std=c++20 M.cpp -emit-reduced-module-interface -o M.reduced.pcm // RUN: %clang_cc1 -std=c++20 useM.cpp -fsyntax-only -fmodule-file=M=M.pcm -verify +// RUN: %clang_cc1 -std=c++20 useM.cpp -fsyntax-only -fmodule-file=M=M.reduced.pcm -verify //--- decls.h int f(); // #1, attached to the global module diff --git a/clang/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4-friend-in-reachable-class.cpp b/clang/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4-friend-in-reachable-class.cpp index 638057cbd681f0..3c120654f2ee5d 100644 --- a/clang/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4-friend-in-reachable-class.cpp +++ b/clang/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4-friend-in-reachable-class.cpp @@ -8,7 +8,10 @@ // RUN: split-file %s %t // // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/Friend-in-reachable-class.cppm -o %t/X.pcm -// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/Friend-in-reachable-class.cppm \ +// RUN: -o %t/X.reduced.pcm +// RUN: %clang_cc1 -std=c++20 -fmodule-file=X=%t/X.pcm %t/Use.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 -fmodule-file=X=%t/X.reduced.pcm %t/Use.cpp -verify -fsyntax-only // //--- Friend-in-reachable-class.cppm module; diff --git a/clang/test/CXX/class/class.compare/class.compare.default/p3.cpp b/clang/test/CXX/class/class.compare/class.compare.default/p3.cpp index 166bd97e2731cb..c73eb0dee99515 100644 --- a/clang/test/CXX/class/class.compare/class.compare.default/p3.cpp +++ b/clang/test/CXX/class/class.compare/class.compare.default/p3.cpp @@ -1,8 +1,8 @@ // This test is for the [class.compare.default]p3 added by P2002R0 -// Also covers modifications made by P2448R2 and extension warnings +// Also covers modifications made by P2448R2 -// RUN: %clang_cc1 -std=c++2a -verify %s -// RUN: %clang_cc1 -std=c++2a -Wc++23-default-comp-relaxed-constexpr -verify=expected,extension %s +// RUN: %clang_cc1 -std=c++2a -verify=expected,cxx2a %s +// RUN: %clang_cc1 -std=c++23 -verify=expected %s namespace std { struct strong_ordering { @@ -82,10 +82,12 @@ struct TestB { }; struct C { - friend bool operator==(const C&, const C&); // expected-note {{previous}} extension-note 2{{non-constexpr comparison function declared here}} + friend bool operator==(const C&, const C&); // expected-note {{previous}} \ + // cxx2a-note 2{{declared here}} friend bool operator!=(const C&, const C&) = default; // expected-note {{previous}} - friend std::strong_ordering operator<=>(const C&, const C&); // expected-note {{previous}} extension-note 2{{non-constexpr comparison function declared here}} + friend std::strong_ordering operator<=>(const C&, const C&); // expected-note {{previous}} \ + // cxx2a-note 2{{declared here}} friend bool operator<(const C&, const C&) = default; // expected-note {{previous}} friend bool operator<=(const C&, const C&) = default; // expected-note {{previous}} friend bool operator>(const C&, const C&) = default; // expected-note {{previous}} @@ -129,23 +131,23 @@ struct TestD { struct E { A a; - C c; // extension-note 2{{non-constexpr comparison function would be used to compare member 'c'}} + C c; // cxx2a-note 2{{non-constexpr comparison function would be used to compare member 'c'}} A b; - friend constexpr bool operator==(const E&, const E&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} + friend constexpr bool operator==(const E&, const E&) = default; // cxx2a-error {{cannot be declared constexpr}} friend constexpr bool operator!=(const E&, const E&) = default; - friend constexpr std::strong_ordering operator<=>(const E&, const E&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} + friend constexpr std::strong_ordering operator<=>(const E&, const E&) = default; // cxx2a-error {{cannot be declared constexpr}} friend constexpr bool operator<(const E&, const E&) = default; friend constexpr bool operator<=(const E&, const E&) = default; friend constexpr bool operator>(const E&, const E&) = default; friend constexpr bool operator>=(const E&, const E&) = default; }; -struct E2 : A, C { // extension-note 2{{non-constexpr comparison function would be used to compare base class 'C'}} - friend constexpr bool operator==(const E2&, const E2&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} +struct E2 : A, C { // cxx2a-note 2{{non-constexpr comparison function would be used to compare base class 'C'}} + friend constexpr bool operator==(const E2&, const E2&) = default; // cxx2a-error {{cannot be declared constexpr}} friend constexpr bool operator!=(const E2&, const E2&) = default; - friend constexpr std::strong_ordering operator<=>(const E2&, const E2&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} + friend constexpr std::strong_ordering operator<=>(const E2&, const E2&) = default; // cxx2a-error {{cannot be declared constexpr}} friend constexpr bool operator<(const E2&, const E2&) = default; friend constexpr bool operator<=(const E2&, const E2&) = default; friend constexpr bool operator>(const E2&, const E2&) = default; @@ -153,14 +155,14 @@ struct E2 : A, C { // extension-note 2{{non-constexpr comparison function would }; struct F { - friend bool operator==(const F&, const F&); // extension-note {{non-constexpr comparison function declared here}} - friend constexpr bool operator!=(const F&, const F&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} - - friend std::strong_ordering operator<=>(const F&, const F&); // extension-note 4{{non-constexpr comparison function declared here}} - friend constexpr bool operator<(const F&, const F&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} - friend constexpr bool operator<=(const F&, const F&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} - friend constexpr bool operator>(const F&, const F&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} - friend constexpr bool operator>=(const F&, const F&) = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} + friend bool operator==(const F&, const F&); // cxx2a-note {{declared here}} + friend constexpr bool operator!=(const F&, const F&) = default; // cxx2a-error {{cannot be declared constexpr}} + + friend std::strong_ordering operator<=>(const F&, const F&); // cxx2a-note 4{{non-constexpr comparison function declared here}} + friend constexpr bool operator<(const F&, const F&) = default; // cxx2a-error {{cannot be declared constexpr}} + friend constexpr bool operator<=(const F&, const F&) = default; // cxx2a-error {{cannot be declared constexpr}} + friend constexpr bool operator>(const F&, const F&) = default; // cxx2a-error {{cannot be declared constexpr}} + friend constexpr bool operator>=(const F&, const F&) = default; // cxx2a-error {{cannot be declared constexpr}} }; // No implicit 'constexpr' if it's not the first declaration. diff --git a/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp b/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp index 02cdd7f85aebfa..534c3b34d8832a 100644 --- a/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp +++ b/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp @@ -1,9 +1,9 @@ -// RUN: %clang_cc1 -std=c++2a -verify %s -// RUN: %clang_cc1 -std=c++2a -Wc++23-default-comp-relaxed-constexpr -verify=expected,extension %s +// RUN: %clang_cc1 -std=c++2a -verify=expected,cxx2a %s +// RUN: %clang_cc1 -std=c++23 -verify=expected %s // This test is for [class.compare.default]p3 as modified and renumbered to p4 // by P2002R0. -// Also covers modifications made by P2448R2 and extension warnings +// Also covers modifications made by P2448R2 namespace std { struct strong_ordering { @@ -78,13 +78,13 @@ void use_g(G g) { } struct H { - bool operator==(const H&) const; // extension-note {{non-constexpr comparison function declared here}} + bool operator==(const H&) const; // cxx2a-note {{non-constexpr comparison function declared here}} constexpr std::strong_ordering operator<=>(const H&) const { return std::strong_ordering::equal; } }; struct I { - H h; // extension-note {{non-constexpr comparison function would be used to compare member 'h'}} - constexpr std::strong_ordering operator<=>(const I&) const = default; // extension-warning {{implicit 'operator==' invokes a non-constexpr comparison function is a C++23 extension}} + H h; // cxx2a-note {{non-constexpr comparison function would be used to compare member 'h'}} + constexpr std::strong_ordering operator<=>(const I&) const = default; // cxx2a-error {{cannot be declared constexpr}} }; struct J { @@ -148,16 +148,16 @@ namespace NoInjectionIfOperatorEqualsDeclared { namespace GH61238 { template struct my_struct { - A value; // extension-note {{non-constexpr comparison function would be used to compare member 'value'}} + A value; // cxx2a-note {{non-constexpr comparison function would be used to compare member 'value'}} - constexpr friend bool operator==(const my_struct &, const my_struct &) noexcept = default; // extension-warning {{declared constexpr but invokes a non-constexpr comparison function is a C++23 extension}} + constexpr friend bool operator==(const my_struct &, const my_struct &) noexcept = default; // cxx2a-error {{cannot be declared constexpr}} }; struct non_constexpr_type { - friend bool operator==(non_constexpr_type, non_constexpr_type) noexcept { // extension-note {{non-constexpr comparison function declared here}} + friend bool operator==(non_constexpr_type, non_constexpr_type) noexcept { // cxx2a-note {{non-constexpr comparison function declared here}} return false; } }; -my_struct obj; // extension-note {{in instantiation of template class 'GH61238::my_struct' requested here}} +my_struct obj; // cxx2a-note {{in instantiation of template class 'GH61238::my_struct' requested here}} } diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/dtor.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/dtor.cpp index 7ad2e582a81268..48bc8fb426bcb1 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/dtor.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/dtor.cpp @@ -58,12 +58,12 @@ namespace subobject { struct A { ~A(); }; - struct B : A { // expected-note {{here}} - constexpr ~B() {} // expected-error {{destructor cannot be declared constexpr because base class 'A' does not have a constexpr destructor}} + struct B : A { // cxx2a-note {{here}} + constexpr ~B() {} // cxx2a-error {{destructor cannot be declared constexpr because base class 'A' does not have a constexpr destructor}} }; struct C { - A a; // expected-note {{here}} - constexpr ~C() {} // expected-error {{destructor cannot be declared constexpr because data member 'a' does not have a constexpr destructor}} + A a; // cxx2a-note {{here}} + constexpr ~C() {} // cxx2a-error {{destructor cannot be declared constexpr because data member 'a' does not have a constexpr destructor}} }; struct D : A { A a; diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3-2b.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3-2b.cpp index c07502c0555b50..8cb37ae6d1cdec 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3-2b.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3-2b.cpp @@ -14,9 +14,8 @@ constexpr int i(int n) { return m; } -constexpr int g() { // expected-error {{constexpr function never produces a constant expression}} - goto test; // expected-note {{subexpression not valid in a constant expression}} \ - // expected-warning {{use of this statement in a constexpr function is incompatible with C++ standards before C++23}} +constexpr int g() { + goto test; // expected-warning {{use of this statement in a constexpr function is incompatible with C++ standards before C++23}} test: return 0; } @@ -29,9 +28,8 @@ struct NonLiteral { // expected-note 2 {{'NonLiteral' is not literal}} NonLiteral() {} }; -constexpr void non_literal() { // expected-error {{constexpr function never produces a constant expression}} - NonLiteral n; // expected-note {{non-literal type 'NonLiteral' cannot be used in a constant expression}} \ - // expected-warning {{definition of a variable of non-literal type in a constexpr function is incompatible with C++ standards before C++23}} +constexpr void non_literal() { + NonLiteral n; // expected-warning {{definition of a variable of non-literal type in a constexpr function is incompatible with C++ standards before C++23}} } constexpr void non_literal2(bool b) { diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3.cpp index 6214ff8006d67f..4416c825226494 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p3.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fcxx-exceptions -verify=expected,beforecxx14,beforecxx20,beforecxx23 -std=c++11 %s -// RUN: %clang_cc1 -fcxx-exceptions -verify=expected,aftercxx14,beforecxx20,beforecxx23 -std=c++14 %s -// RUN: %clang_cc1 -fcxx-exceptions -verify=expected,aftercxx14,aftercxx20,beforecxx23 -std=c++20 %s +// RUN: %clang_cc1 -fcxx-exceptions -verify=expected,aftercxx14,beforecxx20,beforecxx23,cxx14_20 -std=c++14 %s +// RUN: %clang_cc1 -fcxx-exceptions -verify=expected,aftercxx14,aftercxx20,beforecxx23,cxx14_20 -std=c++20 %s // RUN: %clang_cc1 -fcxx-exceptions -verify=expected,aftercxx14,aftercxx20 -std=c++23 %s namespace N { @@ -11,7 +11,7 @@ namespace M { typedef double D; } -struct NonLiteral { // expected-note 2{{no constexpr constructors}} +struct NonLiteral { // beforecxx23-note 2{{no constexpr constructors}} NonLiteral() {} NonLiteral(int) {} }; @@ -43,7 +43,7 @@ struct T : SS, NonLiteral { // - its return type shall be a literal type; // Once we support P2448R2 constexpr functions will be allowd to return non-literal types // The destructor will also be allowed - constexpr NonLiteral NonLiteralReturn() const { return {}; } // expected-error {{constexpr function's return type 'NonLiteral' is not a literal type}} + constexpr NonLiteral NonLiteralReturn() const { return {}; } // beforecxx23-error {{constexpr function's return type 'NonLiteral' is not a literal type}} constexpr void VoidReturn() const { return; } // beforecxx14-error {{constexpr function's return type 'void' is not a literal type}} constexpr ~T(); // beforecxx20-error {{destructor cannot be declared constexpr}} @@ -52,7 +52,7 @@ struct T : SS, NonLiteral { // - each of its parameter types shall be a literal type; // Once we support P2448R2 constexpr functions will be allowd to have parameters of non-literal types - constexpr int NonLiteralParam(NonLiteral) const { return 0; } // expected-error {{constexpr function's 1st parameter type 'NonLiteral' is not a literal type}} + constexpr int NonLiteralParam(NonLiteral) const { return 0; } // beforecxx23-error {{constexpr function's 1st parameter type 'NonLiteral' is not a literal type}} typedef int G(NonLiteral) const; constexpr G NonLiteralParam2; // ok until definition @@ -66,7 +66,7 @@ struct T : SS, NonLiteral { // constexpr since they can't be const. constexpr T &operator=(const T &) = default; // beforecxx14-error {{an explicitly-defaulted copy assignment operator may not have 'const', 'constexpr' or 'volatile' qualifiers}} \ // beforecxx14-warning {{C++14}} \ - // aftercxx14-error{{defaulted definition of copy assignment operator is not constexpr}} + // cxx14_20-error{{defaulted definition of copy assignment operator cannot be marked constexpr}} }; constexpr int T::OutOfLineVirtual() const { return 0; } @@ -229,9 +229,9 @@ namespace DR1364 { return k; // ok, even though lvalue-to-rvalue conversion of a function // parameter is not allowed in a constant expression. } - int kGlobal; // expected-note {{here}} - constexpr int f() { // expected-error {{constexpr function never produces a constant expression}} - return kGlobal; // expected-note {{read of non-const}} + int kGlobal; // beforecxx23-note {{here}} + constexpr int f() { // beforecxx23-error {{constexpr function never produces a constant expression}} + return kGlobal; // beforecxx23-note {{read of non-const}} } } diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p4.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p4.cpp index f1f677ebfcd341..92698ec1c7387d 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p4.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p4.cpp @@ -272,7 +272,7 @@ struct X { union XU1 { int a; constexpr XU1() = default; }; #ifndef CXX2A -// expected-error@-2{{not constexpr}} +// expected-error@-2{{cannot be marked constexpr}} #endif union XU2 { int a = 1; constexpr XU2() = default; }; @@ -282,7 +282,7 @@ struct XU3 { }; constexpr XU3() = default; #ifndef CXX2A - // expected-error@-2{{not constexpr}} + // expected-error@-2{{cannot be marked constexpr}} #endif }; struct XU4 { @@ -333,7 +333,7 @@ namespace CtorLookup { constexpr B(B&); }; constexpr B::B(const B&) = default; - constexpr B::B(B&) = default; // expected-error {{not constexpr}} + constexpr B::B(B&) = default; // expected-error {{cannot be marked constexpr}} struct C { A a; @@ -342,7 +342,7 @@ namespace CtorLookup { constexpr C(C&); }; constexpr C::C(const C&) = default; - constexpr C::C(C&) = default; // expected-error {{not constexpr}} + constexpr C::C(C&) = default; // expected-error {{cannot be marked constexpr}} } namespace PR14503 { diff --git a/clang/test/CXX/dcl.decl/dcl.fct.def/dcl.fct.def.default/p2.cpp b/clang/test/CXX/dcl.decl/dcl.fct.def/dcl.fct.def.default/p2.cpp index 5b525fc91aba1c..849594307390f4 100644 --- a/clang/test/CXX/dcl.decl/dcl.fct.def/dcl.fct.def.default/p2.cpp +++ b/clang/test/CXX/dcl.decl/dcl.fct.def/dcl.fct.def.default/p2.cpp @@ -3,7 +3,7 @@ // An explicitly-defaulted function may be declared constexpr only if it would // have been implicitly declared as constexpr. struct S1 { - constexpr S1() = default; // expected-error {{defaulted definition of default constructor is not constexpr}} + constexpr S1() = default; // expected-error {{defaulted definition of default constructor cannot be marked constexpr}} constexpr S1(const S1&) = default; constexpr S1(S1&&) = default; constexpr S1 &operator=(const S1&) const = default; // expected-error {{explicitly-defaulted copy assignment operator may not have}} @@ -18,8 +18,8 @@ struct NoCopyMove { }; struct S2 { constexpr S2() = default; - constexpr S2(const S2&) = default; // expected-error {{defaulted definition of copy constructor is not constexpr}} - constexpr S2(S2&&) = default; // expected-error {{defaulted definition of move constructor is not constexpr}} + constexpr S2(const S2&) = default; // expected-error {{defaulted definition of copy constructor cannot be marked constexpr}} + constexpr S2(S2&&) = default; // expected-error {{defaulted definition of move constructor cannot be marked}} NoCopyMove ncm; }; diff --git a/clang/test/CXX/drs/dr13xx.cpp b/clang/test/CXX/drs/dr13xx.cpp index effdc53040d0b0..d8e3b5d87bd149 100644 --- a/clang/test/CXX/drs/dr13xx.cpp +++ b/clang/test/CXX/drs/dr13xx.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -std=c++98 %s -verify=expected,cxx98-14,cxx98 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++11 %s -verify=expected,cxx11-17,cxx11-14,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++14 %s -verify=expected,cxx11-17,cxx11-14,since-cxx14,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++17 %s -verify=expected,cxx11-17,since-cxx14,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++20 %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++11 %s -verify=expected,cxx11-20,cxx11-17,cxx11-14,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++14 %s -verify=expected,cxx11-20,cxx11-17,cxx11-14,since-cxx14,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++17 %s -verify=expected,cxx11-20,cxx11-17,since-cxx14,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++20 %s -verify=expected,cxx11-20,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors // RUN: %clang_cc1 -std=c++23 %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors // RUN: %clang_cc1 -std=c++2c %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors @@ -485,11 +485,11 @@ namespace dr1358 { // dr1358: 3.1 struct B : Virt { int member; constexpr B(NonLit u) : member(u) {} - // since-cxx11-error@-1 {{constexpr constructor's 1st parameter type 'NonLit' is not a literal type}} - // since-cxx11-note@#dr1358-NonLit {{'NonLit' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} + // cxx11-20-error@-1 {{constexpr constructor's 1st parameter type 'NonLit' is not a literal type}} + // cxx11-20-note@#dr1358-NonLit {{'NonLit' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} constexpr NonLit f(NonLit u) const { return NonLit(); } - // since-cxx11-error@-1 {{constexpr function's return type 'NonLit' is not a literal type}} - // since-cxx11-note@#dr1358-NonLit {{'NonLit' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} + // cxx11-20-error@-1 {{constexpr function's return type 'NonLit' is not a literal type}} + // cxx11-20-note@#dr1358-NonLit {{'NonLit' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} }; #endif } @@ -498,13 +498,13 @@ namespace dr1359 { // dr1359: 3.5 #if __cplusplus >= 201103L union A { constexpr A() = default; }; union B { constexpr B() = default; int a; }; // #dr1359-B - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr before C++23}} union C { constexpr C() = default; int a, b; }; // #dr1359-C - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} struct X { constexpr X() = default; union {}; }; // since-cxx11-error@-1 {{declaration does not declare anything}} struct Y { constexpr Y() = default; union { int a; }; }; // #dr1359-Y - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} constexpr A a = A(); constexpr B b = B(); diff --git a/clang/test/CXX/drs/dr14xx.cpp b/clang/test/CXX/drs/dr14xx.cpp index 58a2b3a0d0275d..ed6dda731fd518 100644 --- a/clang/test/CXX/drs/dr14xx.cpp +++ b/clang/test/CXX/drs/dr14xx.cpp @@ -153,16 +153,16 @@ namespace dr1460 { // dr1460: 3.5 namespace Defaulted { union A { constexpr A() = default; }; union B { int n; constexpr B() = default; }; - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} union C { int n = 0; constexpr C() = default; }; struct D { union {}; constexpr D() = default; }; // expected-error@-1 {{declaration does not declare anything}} struct E { union { int n; }; constexpr E() = default; }; - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} struct F { union { int n = 0; }; constexpr F() = default; }; struct G { union { int n = 0; }; union { int m; }; constexpr G() = default; }; - // cxx11-17-error@-1 {{defaulted definition of default constructor is not constexpr}} + // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} struct H { union { int n = 0; diff --git a/clang/test/CXX/drs/dr15xx.cpp b/clang/test/CXX/drs/dr15xx.cpp index ac503db625ba0e..195c0fa610d579 100644 --- a/clang/test/CXX/drs/dr15xx.cpp +++ b/clang/test/CXX/drs/dr15xx.cpp @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -std=c++98 -triple x86_64-unknown-unknown %s -verify=expected -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,cxx11-14 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,cxx11-14,cxx14-17 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++2c -triple x86_64-unknown-unknown %s -verify=expected,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx11,cxx11-14 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx11,cxx11-14,cxx14-17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx23,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++2c -triple x86_64-unknown-unknown %s -verify=expected,since-cxx23,since-cxx20,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors namespace dr1512 { // dr1512: 4 void f(char *p) { @@ -407,7 +407,7 @@ namespace dr1573 { // dr1573: 3.9 B b(1, 'x', 4.0, "hello"); // ok // inherited constructor is effectively constexpr if the user-written constructor would be - struct C { C(); constexpr C(int) {} }; + struct C { C(); constexpr C(int) {} }; // #dr1573-C struct D : C { using C::C; }; constexpr D d = D(0); // ok struct E : C { using C::C; A a; }; // #dr1573-E @@ -420,8 +420,11 @@ namespace dr1573 { // dr1573: 3.9 struct F : C { using C::C; C c; }; // #dr1573-F constexpr F f = F(0); // since-cxx11-error@-1 {{constexpr variable 'f' must be initialized by a constant expression}} - // since-cxx11-note@-2 {{constructor inherited from base class 'C' cannot be used in a constant expression; derived class cannot be implicitly initialized}} - // since-cxx11-note@#dr1573-F {{declared here}} + // cxx11-20-note@-2 {{constructor inherited from base class 'C' cannot be used in a constant expression; derived class cannot be implicitly initialized}} + // since-cxx23-note@-3 {{in implicit initialization for inherited constructor of 'F'}} + // since-cxx23-note@#dr1573-F {{non-constexpr constructor 'C' cannot be used in a constant expression}} + // cxx11-20-note@#dr1573-F {{declared here}} + // since-cxx23-note@#dr1573-C {{declared here}} // inherited constructor is effectively deleted if the user-written constructor would be struct G { G(int); }; diff --git a/clang/test/CXX/drs/dr16xx.cpp b/clang/test/CXX/drs/dr16xx.cpp index 2dd7d1502e59fb..766c90d3bc7bda 100644 --- a/clang/test/CXX/drs/dr16xx.cpp +++ b/clang/test/CXX/drs/dr16xx.cpp @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -std=c++98 -triple x86_64-unknown-unknown %s -verify=expected,cxx98-14,cxx98 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify=expected,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx14,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx14,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++2c -triple x86_64-unknown-unknown %s -verify=expected,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx14,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx14,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,cxx11-20,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx23,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++2c -triple x86_64-unknown-unknown %s -verify=expected,since-cxx23,since-cxx14,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors #if __cplusplus == 199711L #define static_assert(...) __extension__ _Static_assert(__VA_ARGS__) @@ -256,12 +256,12 @@ namespace dr1658 { // dr1658: 5 struct A { A(A&); }; struct B : virtual A { virtual void f() = 0; }; struct C : virtual A { virtual void f(); }; - struct D : A { virtual void f() = 0; }; + struct D : A { virtual void f() = 0; }; // since-cxx23-note {{previous declaration is here}} struct X { friend B::B(const B&) throw(); friend C::C(C&); - friend D::D(D&); + friend D::D(D&); // since-cxx23-error {{non-constexpr declaration of 'D' follows constexpr declaration}} }; } @@ -350,8 +350,8 @@ namespace dr1684 { // dr1684: 3.6 }; constexpr int f(NonLiteral &) { return 0; } constexpr int f(NonLiteral) { return 0; } - // since-cxx11-error@-1 {{constexpr function's 1st parameter type 'NonLiteral' is not a literal type}} - // since-cxx11-note@#dr1684-struct {{'NonLiteral' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} + // cxx11-20-error@-1 {{constexpr function's 1st parameter type 'NonLiteral' is not a literal type}} + // cxx11-20-note@#dr1684-struct {{'NonLiteral' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} #endif } diff --git a/clang/test/CXX/drs/dr519.cpp b/clang/test/CXX/drs/dr519.cpp new file mode 100644 index 00000000000000..67c01d95ef7c6f --- /dev/null +++ b/clang/test/CXX/drs/dr519.cpp @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +namespace dr519 { // dr519: 2.7 +void f() { + int *a = 0; + void *v = a; + bool c1 = v == static_cast(0); + + void *w = 0; + int *b = static_cast(w); + bool c2 = b == static_cast(0); +} +} // namespace dr519 + +// We're checking that `null`s that were initially stored in `a` and `w` +// are simply copied over all the way to respective comparisons with `null`. + +// CHECK-LABEL: define {{.*}} void @dr519::f()() +// CHECK: store ptr null, ptr [[A:%.+]], +// CHECK-NEXT: [[TEMP_A:%.+]] = load ptr, ptr [[A]] +// CHECK-NEXT: store ptr [[TEMP_A]], ptr [[V:%.+]], +// CHECK-NEXT: [[TEMP_V:%.+]] = load ptr, ptr [[V]] +// CHECK-NEXT: {{.+}} = icmp eq ptr [[TEMP_V]], null + +// CHECK: store ptr null, ptr [[W:%.+]], +// CHECK-NEXT: [[TEMP_W:%.+]] = load ptr, ptr [[W]] +// CHECK-NEXT: store ptr [[TEMP_W]], ptr [[B:%.+]], +// CHECK-NEXT: [[TEMP_B:%.+]] = load ptr, ptr [[B]] +// CHECK-NEXT: {{.+}} = icmp eq ptr [[TEMP_B]], null +// CHECK-LABEL: } diff --git a/clang/test/CXX/drs/dr571.cpp b/clang/test/CXX/drs/dr571.cpp new file mode 100644 index 00000000000000..19a85b7ddc3508 --- /dev/null +++ b/clang/test/CXX/drs/dr571.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +namespace dr571 { // dr571: 2.7 + typedef int &ir; + int n; + const ir r = n; + // expected-warning@-1 {{'const' qualifier on reference type 'ir' (aka 'int &') has no effect}} + ir r2 = n; +} + +// Entities have external linkage by default. + +// CHECK: @dr571::r = constant ptr @dr571::n +// CHECK: @dr571::r2 = constant ptr @dr571::n diff --git a/clang/test/CXX/drs/dr5xx.cpp b/clang/test/CXX/drs/dr5xx.cpp index 0e1de342f6706f..426b368b390ae6 100644 --- a/clang/test/CXX/drs/dr5xx.cpp +++ b/clang/test/CXX/drs/dr5xx.cpp @@ -141,15 +141,7 @@ namespace dr518 { // dr518: yes c++11 // cxx98-error@-1 {{commas at the end of enumerator lists are a C++11 extension}} } -namespace dr519 { // dr519: yes -// FIXME: Add a codegen test. -#if __cplusplus >= 201103L -#define fold(x) (__builtin_constant_p(x) ? (x) : (x)) - int test[fold((int*)(void*)0) ? -1 : 1]; -#undef fold -#endif -} - +// dr519 is in dr519.cpp // dr520: na // dr521: no @@ -800,14 +792,7 @@ namespace dr570 { // dr570: dup 633 // expected-note@#dr570-r {{previous definition is here}} } -namespace dr571 { // dr571 unknown - // FIXME: Add a codegen test. - typedef int &ir; - int n; - // FIXME: Test if this has internal linkage. - const ir r = n; - // expected-warning@-1 {{'const' qualifier on reference type 'ir' (aka 'int &') has no effect}} -} +// dr571 is in dr571.cpp namespace dr572 { // dr572: yes enum E { a = 1, b = 2 }; diff --git a/clang/test/CXX/drs/dr6xx.cpp b/clang/test/CXX/drs/dr6xx.cpp index b35d3051ab554c..190e05784f32be 100644 --- a/clang/test/CXX/drs/dr6xx.cpp +++ b/clang/test/CXX/drs/dr6xx.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -std=c++98 %s -verify=expected,cxx98-17,cxx98-14,cxx98 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking -// RUN: %clang_cc1 -std=c++11 %s -verify=expected,cxx98-17,cxx11-17,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking -// RUN: %clang_cc1 -std=c++14 %s -verify=expected,cxx98-17,cxx11-17,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking -// RUN: %clang_cc1 -std=c++17 %s -verify=expected,cxx98-17,cxx11-17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking -// RUN: %clang_cc1 -std=c++20 %s -verify=expected,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking +// RUN: %clang_cc1 -std=c++11 %s -verify=expected,cxx11-20,cxx98-17,cxx11-17,cxx98-14,since-cxx11,cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking +// RUN: %clang_cc1 -std=c++14 %s -verify=expected,cxx11-20,cxx98-17,cxx11-17,cxx98-14,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking +// RUN: %clang_cc1 -std=c++17 %s -verify=expected,cxx11-20,cxx98-17,cxx11-17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking +// RUN: %clang_cc1 -std=c++20 %s -verify=expected,cxx11-20,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking // RUN: %clang_cc1 -std=c++23 %s -verify=expected,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking namespace dr600 { // dr600: 2.8 @@ -584,8 +584,8 @@ namespace dr647 { // dr647: 3.1 struct C { constexpr C(NonLiteral); constexpr C(NonLiteral, int) {} - // since-cxx11-error@-1 {{constexpr constructor's 1st parameter type 'NonLiteral' is not a literal type}} - // since-cxx11-note@#dr647-NonLiteral {{'NonLiteral' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} + // cxx11-20-error@-1 {{constexpr constructor's 1st parameter type 'NonLiteral' is not a literal type}} + // cxx11-20-note@#dr647-NonLiteral {{'NonLiteral' is not literal because it is not an aggregate and has no constexpr constructors other than copy or move constructors}} constexpr C() try {} catch (...) {} // cxx11-17-error@-1 {{function try block in constexpr constructor is a C++20 extension}} // cxx11-error@-2 {{use of this statement in a constexpr constructor is a C++14 extension}} @@ -609,15 +609,15 @@ namespace dr647 { // dr647: 3.1 d(0) {} constexpr E(int) - // since-cxx11-error@-1 {{constexpr constructor never produces a constant expression}} - // since-cxx11-note@#dr647-int-d {{non-constexpr constructor 'D' cannot be used in a constant expression}} - // since-cxx11-note@#dr647-D-float-ctor {{declared here}} + // cxx11-20-error@-1 {{constexpr constructor never produces a constant expression}} + // cxx11-20-note@#dr647-int-d {{non-constexpr constructor 'D' cannot be used in a constant expression}} + // cxx11-20-note@#dr647-D-float-ctor {{declared here}} : n(0), d(0.0f) {} // #dr647-int-d constexpr E(float f) - // since-cxx11-error@-1 {{never produces a constant expression}} - // since-cxx11-note@#dr647-float-d {{non-constexpr constructor}} - // since-cxx11-note@#dr647-D-float-ctor {{declared here}} + // cxx11-20-error@-1 {{never produces a constant expression}} + // cxx11-20-note@#dr647-float-d {{non-constexpr constructor}} + // cxx11-20-note@#dr647-D-float-ctor {{declared here}} : n(get()), d(D(0) + f) {} // #dr647-float-d }; diff --git a/clang/test/CXX/expr/expr.const/p5-26.cpp b/clang/test/CXX/expr/expr.const/p5-26.cpp index de2afa71b42669..3624b1e5a3e3df 100644 --- a/clang/test/CXX/expr/expr.const/p5-26.cpp +++ b/clang/test/CXX/expr/expr.const/p5-26.cpp @@ -5,11 +5,11 @@ struct S {}; struct T : S {} t; -consteval void test() { // cxx23-error{{consteval function never produces a constant expression}} +consteval void test() { void* a = &t; const void* b = &t; volatile void* c = &t; - (void)static_cast(a); //cxx23-note {{cast from 'void *' is not allowed in a constant expression in C++ standards before C++2c}} + (void)static_cast(a); (void)static_cast(a); (void)static_cast(a); diff --git a/clang/test/CXX/special/class.copy/p13-0x.cpp b/clang/test/CXX/special/class.copy/p13-0x.cpp index 16c8a4029cbac6..013d5b56582380 100644 --- a/clang/test/CXX/special/class.copy/p13-0x.cpp +++ b/clang/test/CXX/special/class.copy/p13-0x.cpp @@ -125,7 +125,7 @@ namespace Mutable { mutable A a; }; struct C { - constexpr C(const C &) = default; // expected-error {{not constexpr}} + constexpr C(const C &) = default; // expected-error {{cannot be marked constexpr}} A a; }; } diff --git a/clang/test/CodeGen/attr-availability-visionos.c b/clang/test/CodeGen/attr-availability-visionos.c new file mode 100644 index 00000000000000..09b98fb4a7d5e3 --- /dev/null +++ b/clang/test/CodeGen/attr-availability-visionos.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple arm64-apple-xros1 -emit-llvm -o - %s 2>&1 | FileCheck %s + +__attribute__((availability(visionOS, introduced=1.1))) +void introduced_1_1(); + +void use() { + if (__builtin_available(visionOS 1.2, *)) + introduced_1_1(); + // CHECK: call i32 @__isPlatformVersionAtLeast(i32 11, i32 1, i32 2, i32 0) +} diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index ae1a8772f6cc07..b7112c783da913 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -273,7 +273,7 @@ int hoo(void) { // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: -// CHECK-NEXT: ret ptr @fmv_inline._MfcmaMfp16Mfp16MrdmMsme +// CHECK-NEXT: ret ptr @fmv_inline._MfcmaMfp16MrdmMsme // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 // CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 864726312827224064 @@ -582,7 +582,7 @@ int hoo(void) { // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfcmaMfp16Mfp16MrdmMsme +// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfcmaMfp16MrdmMsme // CHECK-SAME: () #[[ATTR13:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 2 diff --git a/clang/test/CodeGen/tbaa-struct-relaxed-aliasing-with-tsan.cpp b/clang/test/CodeGen/tbaa-struct-relaxed-aliasing-with-tsan.cpp new file mode 100644 index 00000000000000..931ff2476cd1bb --- /dev/null +++ b/clang/test/CodeGen/tbaa-struct-relaxed-aliasing-with-tsan.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - -O1 -relaxed-aliasing -fsanitize=thread -disable-llvm-optzns %s | \ +// RUN: FileCheck %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -new-struct-path-tbaa \ +// RUN: -emit-llvm -o - -O1 -relaxed-aliasing -fsanitize=thread -disable-llvm-optzns %s | \ +// RUN: FileCheck %s +// +// Check that we do not create tbaa for instructions generated for copies. +// FIXME: !tbaa.struct is generated with null node as tag. + +// CHECK: !tbaa.struct +// CHECK-NOT: !tbaa + +struct A { + short s; + int i; + char c; + int j; +}; + +void copyStruct(A *a1, A *a2) { + *a1 = *a2; +} + +void copyInt(int *a, int *b) { + *a = *b; +} diff --git a/clang/test/CodeGen/tbaa-struct.cpp b/clang/test/CodeGen/tbaa-struct.cpp index 883c982be26c8f..63e4097946448e 100644 --- a/clang/test/CodeGen/tbaa-struct.cpp +++ b/clang/test/CodeGen/tbaa-struct.cpp @@ -151,6 +151,38 @@ void copy10(NamedBitfields3 *a1, NamedBitfields3 *a2) { *a1 = *a2; } +union U2 { + double d; + float f; +}; + +struct UnionMember1 { + U2 u; + int p; +}; + +void copy11(UnionMember1 *a1, UnionMember1 *a2) { +// CHECK-LABEL: _Z6copy11P12UnionMember1S0_ +// CHECK: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %a1, ptr noundef nonnull align 8 dereferenceable(16) %a2, i64 16, i1 false), +// CHECK-OLD-SAME: !tbaa.struct [[TS9:!.*]] +// CHECK-NEW-SAME: !tbaa [[TAG_UnionMember1:!.+]], !tbaa.struct + *a1 = *a2; +} + +struct UnionMember2 { + int p; + U2 u; +}; + +void copy12(UnionMember2 *a1, UnionMember2 *a2) { +// CHECK-LABEL: _Z6copy12P12UnionMember2S0_ +// CHECK: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %a1, ptr noundef nonnull align 8 dereferenceable(16) %a2, i64 16, i1 false), +// CHECK-OLD-SAME: !tbaa.struct [[TS10:!.*]] +// CHECK-NEW-SAME: !tbaa [[TAG_UnionMember2:!.+]], !tbaa.struct + + *a1 = *a2; +} + // CHECK-OLD: [[TS]] = !{i64 0, i64 2, !{{.*}}, i64 4, i64 4, !{{.*}}, i64 8, i64 1, !{{.*}}, i64 12, i64 4, !{{.*}}} // CHECK-OLD: [[CHAR:!.*]] = !{!"omnipotent char", !{{.*}}} // CHECK-OLD: [[TAG_INT:!.*]] = !{[[INT:!.*]], [[INT]], i64 0} @@ -167,6 +199,10 @@ void copy10(NamedBitfields3 *a1, NamedBitfields3 *a2) { // CHECK-OLD [[DOUBLE]] = !{!"double", [[CHAR]], i64 0} // CHECK-OLD: [[TS7]] = !{i64 0, i64 1, [[TAG_CHAR]], i64 1, i64 1, [[TAG_CHAR]], i64 2, i64 1, [[TAG_CHAR]], i64 3, i64 1, [[TAG_CHAR]], i64 4, i64 1, [[TAG_CHAR]], i64 8, i64 8, [[TAG_DOUBLE]], i64 16, i64 1, [[TAG_CHAR]]} // CHECK-OLD: [[TS8]] = !{i64 0, i64 4, [[TAG_CHAR]], i64 8, i64 8, [[TAG_DOUBLE]]} +// CHECK-OLD: [[TS9]] = !{i64 0, i64 8, [[TAG_DOUBLE]], i64 0, i64 4, [[TAG_FLOAT:!.+]], i64 8, i64 4, [[TAG_INT]]} +// CHECK-OLD: [[TAG_FLOAT]] = !{[[FLOAT:!.+]], [[FLOAT]], i64 0} +// CHECK-OLD: [[FLOAT]] = !{!"float", [[CHAR]], i64 0} +// CHECK-OLD: [[TS10]] = !{i64 0, i64 4, [[TAG_INT]], i64 8, i64 8, [[TAG_DOUBLE]], i64 8, i64 4, [[TAG_FLOAT:!.+]]} // CHECK-NEW-DAG: [[TYPE_char:!.*]] = !{{{.*}}, i64 1, !"omnipotent char"} // CHECK-NEW-DAG: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0, i64 0} @@ -188,3 +224,7 @@ void copy10(NamedBitfields3 *a1, NamedBitfields3 *a2) { // CHECK-NEW-DAG: [[TYPE_NamedBitfields2]] = !{[[TYPE_char]], i64 24, !"_ZTS15NamedBitfields2", [[TYPE_char]], i64 0, i64 1, [[TYPE_char]], i64 1, i64 1, [[TYPE_char]], i64 2, i64 1, [[TYPE_int]], i64 3, i64 4, [[TYPE_int]], i64 3, i64 4, [[TYPE_char]], i64 4, i64 1, [[TYPE_double]], i64 8, i64 8, [[TYPE_int]], i64 16, i64 4} // CHECK-NEW-DAG: [[TAG_NamedBitfields3]] = !{[[TYPE_NamedBitfields3:!.+]], [[TYPE_NamedBitfields3]], i64 0, i64 16} // CHECK-NEW-DAG: [[TYPE_NamedBitfields3]] = !{[[TYPE_char]], i64 16, !"_ZTS15NamedBitfields3", [[TYPE_int]], i64 1, i64 4, [[TYPE_int]], i64 2, i64 4, [[TYPE_double]], i64 8, i64 8} +// CHECK-NEW-DAG: [[TAG_UnionMember1]] = !{[[TYPE_UnionMember1:!.+]], [[TYPE_UnionMember1]], i64 0, i64 16} +// CHECK-NEW-DAG: [[TYPE_UnionMember1]] = !{[[TYPE_char]], i64 16, !"_ZTS12UnionMember1", [[TYPE_char]], i64 0, i64 8, [[TYPE_int]], i64 8, i64 4} +// CHECK-NEW-DAG: [[TAG_UnionMember2]] = !{[[TYPE_UnionMember2:!.+]], [[TYPE_UnionMember2]], i64 0, i64 16} +// CHECK-NEW-DAG: [[TYPE_UnionMember2]] = !{[[TYPE_char]], i64 16, !"_ZTS12UnionMember2", [[TYPE_int]], i64 0, i64 4, [[TYPE_char]], i64 8, i64 8} diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c index 4975e2f8a52399..849afb871ddbfc 100644 --- a/clang/test/Driver/amdgpu-openmp-toolchain.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -11,7 +11,7 @@ // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" // CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx906"{{.*}}"-fcuda-is-device"{{.*}} // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj" -// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out" +// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out" // RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PHASES %s diff --git a/clang/test/Driver/clang-offload-bundler.c b/clang/test/Driver/clang-offload-bundler.c index 9d8b81ee9806ee..f3cd2493e05277 100644 --- a/clang/test/Driver/clang-offload-bundler.c +++ b/clang/test/Driver/clang-offload-bundler.c @@ -13,6 +13,19 @@ // RUN: obj2yaml %t.o > %t.o.yaml // RUN: %clang -O0 -target %itanium_abi_triple %s -emit-ast -o %t.ast +// RUN: echo 'void a() {}' >%t.a.cpp +// RUN: echo 'void b() {}' >%t.b.cpp +// RUN: %clang -target %itanium_abi_triple %t.a.cpp -c -o %t.a.o +// RUN: %clang -target %itanium_abi_triple %t.b.cpp -c -o %t.b.o +// +// Remove .llvm_addrsig section since its offset changes after llvm-objcopy +// removes clang-offload-bundler sections, therefore not good for comparison. +// +// RUN: llvm-objcopy --remove-section=.llvm_addrsig %t.a.o +// RUN: llvm-objcopy --remove-section=.llvm_addrsig %t.b.o +// RUN: obj2yaml %t.a.o > %t.a.yaml +// RUN: obj2yaml %t.b.o > %t.b.yaml + // // Generate an empty file to help with the checks of empty files. // @@ -414,6 +427,25 @@ // HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906 // HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906 +// +// Check unbundling archive for host target +// +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx900 \ +// RUN: -input=%t.a.o -input=%t.tgt1 -output=%t.a.bundled.o +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx900 \ +// RUN: -input=%t.b.o -input=%t.tgt1 -output=%t.b.bundled.o +// RUN: rm -f %t.bundled.a +// RUN: llvm-ar cr %t.bundled.a %t.a.bundled.o %t.b.bundled.o +// RUN: cp %t.bundled.a %t.bundled.a.bak +// RUN: clang-offload-bundler -unbundle --targets=host-%itanium_abi_triple -type=a -input=%t.bundled.a -output=%t.host.a +// RUN: rm -f *%itanium_abi_triple*.a.bundled.o *%itanium_abi_triple*.b.bundled.o +// RUN: llvm-ar -x %t.host.a +// RUN: diff %t.bundled.a %t.bundled.a.bak +// RUN: obj2yaml *%itanium_abi_triple*.a.bundled.o > %t.a.unbundled.yaml +// RUN: diff %t.a.unbundled.yaml %t.a.yaml +// RUN: obj2yaml *%itanium_abi_triple*.b.bundled.o > %t.b.unbundled.yaml +// RUN: diff %t.b.unbundled.yaml %t.b.yaml +// // Check clang-offload-bundler reporting an error when trying to unbundle an archive but // the input file is not an archive. // diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu index 9a231091de2bdc..a1c3c9b51b1e41 100644 --- a/clang/test/Driver/cuda-phases.cu +++ b/clang/test/Driver/cuda-phases.cu @@ -244,31 +244,32 @@ // NEW-DRIVER-RDC-NEXT: 18: assembler, {17}, object, (host-cuda) // NEW-DRIVER-RDC-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda) -// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \ +// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \ // RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER %s -// NEW-DRIVER: 0: input, "[[INPUT:.+]]", cuda -// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output -// NEW-DRIVER-NEXT: 2: compiler, {1}, ir -// NEW-DRIVER-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52) +// NEW-DRIVER: 0: input, "[[CUDA:.+]]", cuda, (host-cuda) +// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) +// NEW-DRIVER-NEXT: 2: compiler, {1}, ir, (host-cuda) +// NEW-DRIVER-NEXT: 3: input, "[[CUDA]]", cuda, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52) -// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object -// NEW-DRIVER-NEXT: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70) +// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object +// NEW-DRIVER-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70) -// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object -// NEW-DRIVER-NEXT: 15: clang-offload-packager, {8, 14}, image -// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {15}, ir +// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object +// NEW-DRIVER-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda) +// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir // NEW-DRIVER-NEXT: 17: backend, {16}, assembler, (host-cuda) // NEW-DRIVER-NEXT: 18: assembler, {17}, object, (host-cuda) // NEW-DRIVER-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda) // RUN: %clang -### --target=powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \ // RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s %S/Inputs/empty.cpp 2>&1 | FileCheck --check-prefix=NON-CUDA-INPUT %s + // NON-CUDA-INPUT: 0: input, "[[CUDA:.+]]", cuda, (host-cuda) // NON-CUDA-INPUT-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) // NON-CUDA-INPUT-NEXT: 2: compiler, {1}, ir, (host-cuda) @@ -277,13 +278,13 @@ // NON-CUDA-INPUT-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52) // NON-CUDA-INPUT-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52) // NON-CUDA-INPUT-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52) -// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object +// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object // NON-CUDA-INPUT-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70) -// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object +// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object // NON-CUDA-INPUT-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda) // NON-CUDA-INPUT-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir // NON-CUDA-INPUT-NEXT: 17: backend, {16}, assembler, (host-cuda) diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c index 08f860f6cab0de..75475264135224 100644 --- a/clang/test/Driver/linker-wrapper-image.c +++ b/clang/test/Driver/linker-wrapper-image.c @@ -8,11 +8,11 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-ELF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP-ELF,OPENMP-REL +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP-ELF,OPENMP-REL // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-COFF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-COFF // OPENMP-ELF: @__start_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] // OPENMP-ELF-NEXT: @__stop_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] @@ -45,11 +45,11 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-COFF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-COFF // CUDA-ELF: @__start_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] // CUDA-ELF-NEXT: @__stop_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] @@ -145,11 +145,11 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu -r \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF // HIP-ELF: @__start_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] // HIP-ELF-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c index 2073092bdbcf9e..9a78200d7d3cfc 100644 --- a/clang/test/Driver/linker-wrapper-libs.c +++ b/clang/test/Driver/linker-wrapper-libs.c @@ -43,7 +43,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-RESOLVES // LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -65,7 +65,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL // LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -88,7 +88,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-NONE // LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -109,7 +109,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-WEAK // LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 @@ -131,7 +131,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-HIDDEN // LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 @@ -154,7 +154,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED // LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -178,7 +178,7 @@ int bar() { return weak; } // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o --whole-archive %t.a -o a.out 2>&1 \ +// RUN: --linker-path=/usr/bin/ld %t.o --whole-archive %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-WHOLE-ARCHIVE // LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 83df2b84adefed..0e6fd80b429846 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -16,10 +16,10 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm-bc -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK // NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 {{.*}}.o {{.*}}.o @@ -28,7 +28,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG // NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 {{.*}}.o {{.*}}.o -g @@ -37,7 +37,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK // AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o @@ -46,7 +46,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS // AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.s -save-temps @@ -56,14 +56,14 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: llvm-ar rcs %t.a %t.o // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld.lld -- --whole-archive %t.a --no-whole-archive \ +// RUN: --linker-path=/usr/bin/ld.lld --whole-archive %t.a --no-whole-archive \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK // CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -mllvm -openmp-opt-disable \ -// RUN: --linker-path=/usr/bin/ld.lld -- -a -b -c %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HOST-LINK +// RUN: --linker-path=/usr/bin/ld.lld -a -b -c %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HOST-LINK // HOST-LINK: ld.lld{{.*}}-a -b -c {{.*}}.o -o a.out // HOST-LINK-NOT: ld.lld{{.*}}-abc @@ -77,7 +77,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-obj.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t.a %t-obj.o -o a.out 2>&1 | FileCheck %s --check-prefix=STATIC-LIBRARY +// RUN: --linker-path=/usr/bin/ld %t.a %t-obj.o -o a.out 2>&1 | FileCheck %s --check-prefix=STATIC-LIBRARY // STATIC-LIBRARY: clang{{.*}} -march=sm_70 // STATIC-LIBRARY-NOT: clang{{.*}} -march=sm_50 @@ -89,7 +89,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA // CUDA: clang{{.*}} -o [[IMG_SM52:.+]] --target=nvptx64-nvidia-cuda -march=sm_52 // CUDA: clang{{.*}} -o [[IMG_SM70:.+]] --target=nvptx64-nvidia-cuda -march=sm_70 @@ -104,7 +104,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=4 \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR // CUDA-PAR: fatbinary{{.*}}-64 --create {{.*}}.fatbin @@ -114,12 +114,12 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP +// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --compress \ +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP // HIP: clang{{.*}} -o [[IMG_GFX908:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx908 // HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx90a -// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb +// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -compress -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ @@ -127,14 +127,14 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \ +// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=LINKER-ARGS // LINKER-ARGS: clang{{.*}}--target=amdgcn-amd-amdhsa{{.*}}a // LINKER-ARGS: clang{{.*}}--target=nvptx64-nvidia-cuda{{.*}}a b // RUN: not clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -ldummy \ -// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \ +// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b \ // RUN: -o a.out 2>&1 | FileCheck %s --check-prefix=MISSING-LIBRARY // MISSING-LIBRARY: error: unable to find library -ldummy @@ -144,7 +144,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND // CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.bc @@ -152,7 +152,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-windows-msvc -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-windows-msvc --dry-run \ -// RUN: --linker-path=/usr/bin/lld-link -- %t.o -libpath:./ -out:a.exe 2>&1 | FileCheck %s --check-prefix=COFF +// RUN: --linker-path=/usr/bin/lld-link %t.o -libpath:./ -out:a.exe 2>&1 | FileCheck %s --check-prefix=COFF // COFF: "/usr/bin/lld-link" {{.*}}.o -libpath:./ -out:a.exe {{.*}}openmp.image.wrapper{{.*}} @@ -167,7 +167,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a:xnack- // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-off.o -fembed-offload-object=%t-off.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID +// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID // AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o // AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o @@ -183,7 +183,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t2.o -fembed-offload-object=%t2.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld -- %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL +// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL // ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o // ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o @@ -193,7 +193,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld.lld -- -r %t.o \ +// RUN: --linker-path=/usr/bin/ld.lld -r %t.o \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK // RELOCATABLE-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu @@ -205,7 +205,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld.lld -- -r %t.o \ +// RUN: --linker-path=/usr/bin/ld.lld -r %t.o \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-HIP // RELOCATABLE-LINK-HIP: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa @@ -218,7 +218,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_89 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld.lld -- -r %t.o \ +// RUN: --linker-path=/usr/bin/ld.lld -r %t.o \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-CUDA // RELOCATABLE-LINK-CUDA: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index f7b06c9ec59580..d705be44e595d8 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -233,7 +233,7 @@ // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" // CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52" // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj" -// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out" +// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out" // RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PHASES %s diff --git a/clang/test/Driver/openmp-offload-infer.c b/clang/test/Driver/openmp-offload-infer.c index 9a949f52e2e97d..50333293eb7dbd 100644 --- a/clang/test/Driver/openmp-offload-infer.c +++ b/clang/test/Driver/openmp-offload-infer.c @@ -13,7 +13,7 @@ // CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx803" // CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52" // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj" -// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out" +// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out" // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \ // RUN: --offload-arch=sm_70 --offload-arch=gfx908:sramecc+:xnack- \ diff --git a/clang/test/InstallAPI/functions.test b/clang/test/InstallAPI/functions.test new file mode 100644 index 00000000000000..527965303cb351 --- /dev/null +++ b/clang/test/InstallAPI/functions.test @@ -0,0 +1,78 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json + +// RUN: clang-installapi -target arm64-apple-macos13.1 \ +// RUN: -I%t/usr/include -I%t/usr/local/include \ +// RUN: -install_name @rpath/lib/libfunctions.dylib \ +// RUN: %t/inputs.json -o %t/outputs.tbd 2>&1 | FileCheck %s --allow-empty +// RUN: llvm-readtapi -compare %t/outputs.tbd %t/expected.tbd 2>&1 | FileCheck %s --allow-empty + +// CHECK-NOT: error: +// CHECK-NOT: warning: + +//--- usr/include/functions.h +inline int inlined_func(void) { return 1;} +int public(int a); + +//--- usr/local/include/private_functions.h +__attribute__((visibility("hidden"))) +void hidden(void); + +//--- inputs.json.in +{ + "headers": [ { + "path" : "DSTROOT/usr/include/functions.h", + "type" : "public" + }, + { + "path" : "DSTROOT/usr/local/include/private_functions.h", + "type" : "private" + } + ], + "version": "3" +} + +//--- expected.tbd +{ + "main_library": { + "compatibility_versions": [ + { + "version": "0" + } + ], + "current_versions": [ + { + "version": "0" + } + ], + "exported_symbols": [ + { + "text": { + "global": [ + "_public" + ] + } + } + ], + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "@rpath/lib/libfunctions.dylib" + } + ], + "target_info": [ + { + "min_deployment": "13.1", + "target": "arm64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} diff --git a/clang/test/Interpreter/execute-stmts.cpp b/clang/test/Interpreter/execute-stmts.cpp index 2d4c17e0c91e66..433c6811777dac 100644 --- a/clang/test/Interpreter/execute-stmts.cpp +++ b/clang/test/Interpreter/execute-stmts.cpp @@ -9,7 +9,6 @@ //CODEGEN-CHECK-COUNT-2: define internal void @__stmts__ //CODEGEN-CHECK-NOT: define internal void @__stmts__ - extern "C" int printf(const char*,...); template T call() { printf("called\n"); return T(); } @@ -41,3 +40,26 @@ for (; i > 4; --i) { printf("i = %d\n", i); }; int j = i; printf("j = %d\n", j); // CHECK-NEXT: j = 4 + +{i = 0; printf("i = %d (global scope)\n", i);} +// CHECK-NEXT: i = 0 + +while (int i = 1) { printf("i = %d (while condition)\n", i--); break; } +// CHECK-NEXT: i = 1 + +if (int i = 2) printf("i = %d (if condition)\n", i); +// CHECK-NEXT: i = 2 + +switch (int i = 3) { default: printf("i = %d (switch condition)\n", i); } +// CHECK-NEXT: i = 3 + +for (int i = 4; i > 3; --i) printf("i = %d (for-init)\n", i); +// CHECK-NEXT: i = 4 + +for (const auto &i : "5") printf("i = %c (range-based for-init)\n", i); +// CHECK-NEXT: i = 5 + +int *aa=nullptr; +if (auto *b=aa) *b += 1; +while (auto *b=aa) ; +for (auto *b=aa; b; *b+=1) ; diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index 2650a3a82252ba..9496746c6fd663 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -45,7 +45,7 @@ #endif -#if check(implicit_move, 0, 0, 0, 0, 0, 202011, 202011) +#if check(implicit_move, 0, 0, 0, 0, 0, 202207, 202207) #error "wrong value for __cpp_implicit_move" #endif diff --git a/clang/test/Modules/InheritDefaultArguments.cppm b/clang/test/Modules/InheritDefaultArguments.cppm index 0afb46319ff850..0ef6390204c4b9 100644 --- a/clang/test/Modules/InheritDefaultArguments.cppm +++ b/clang/test/Modules/InheritDefaultArguments.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only + //--- foo.h template class Templ; diff --git a/clang/test/Modules/Reachability-Private.cpp b/clang/test/Modules/Reachability-Private.cpp index 9a7c3ba231f179..3ce108dc5c5509 100644 --- a/clang/test/Modules/Reachability-Private.cpp +++ b/clang/test/Modules/Reachability-Private.cpp @@ -9,6 +9,16 @@ // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp \ // RUN: -DTEST_BADINLINE -verify -fsyntax-only +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/Private.cppm -emit-reduced-module-interface \ +// RUN: -o %t/Private.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp \ +// RUN: -DTEST_BADINLINE -verify -fsyntax-only + //--- Private.cppm export module Private; #ifdef TEST_BADINLINE diff --git a/clang/test/Modules/Reachability-func-default-arg.cpp b/clang/test/Modules/Reachability-func-default-arg.cpp index 0d6d8655d53293..bc0cafdebb7a4e 100644 --- a/clang/test/Modules/Reachability-func-default-arg.cpp +++ b/clang/test/Modules/Reachability-func-default-arg.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/func_default_arg.cppm -emit-module-interface -o %t/func_default_arg.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + +// RUN: %clang_cc1 -std=c++20 %t/func_default_arg.cppm -emit-reduced-module-interface -o %t/func_default_arg.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only // //--- func_default_arg.cppm export module func_default_arg; diff --git a/clang/test/Modules/Reachability-func-ret.cpp b/clang/test/Modules/Reachability-func-ret.cpp index ca5bbc68d759f9..7d34387726f683 100644 --- a/clang/test/Modules/Reachability-func-ret.cpp +++ b/clang/test/Modules/Reachability-func-ret.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/func_ret.cppm -emit-module-interface -o %t/func_ret.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + +// RUN: %clang_cc1 -std=c++20 %t/func_ret.cppm -emit-reduced-module-interface -o %t/func_ret.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only // //--- func_ret.cppm export module func_ret; diff --git a/clang/test/Modules/Reachability-template-default-arg.cpp b/clang/test/Modules/Reachability-template-default-arg.cpp index 6fb109e41fcf0a..35c647d0d344ba 100644 --- a/clang/test/Modules/Reachability-template-default-arg.cpp +++ b/clang/test/Modules/Reachability-template-default-arg.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/template_default_arg.cppm -emit-module-interface -o %t/template_default_arg.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/template_default_arg.cppm -emit-reduced-module-interface -o %t/template_default_arg.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // //--- template_default_arg.cppm export module template_default_arg; diff --git a/clang/test/Modules/Reachability-template-instantiation.cpp b/clang/test/Modules/Reachability-template-instantiation.cpp index 2170c7b92a370a..6f363ed00b6e36 100644 --- a/clang/test/Modules/Reachability-template-instantiation.cpp +++ b/clang/test/Modules/Reachability-template-instantiation.cpp @@ -5,6 +5,10 @@ // RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 %t/Use.cppm -fprebuilt-module-path=%t -emit-module-interface -o %t/Use.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cpp -verify -fsyntax-only + +// RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-reduced-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 %t/Use.cppm -fprebuilt-module-path=%t -emit-reduced-module-interface -o %t/Use.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cpp -verify -fsyntax-only // //--- Templ.h #ifndef TEMPL_H diff --git a/clang/test/Modules/Reachability-using-templates.cpp b/clang/test/Modules/Reachability-using-templates.cpp index f530e15bd4d2ba..65601c1cfe4e2d 100644 --- a/clang/test/Modules/Reachability-using-templates.cpp +++ b/clang/test/Modules/Reachability-using-templates.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/mod.templates.cppm -emit-module-interface -o %t/mod.templates.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/mod.templates.cppm -emit-reduced-module-interface -o %t/mod.templates.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // //--- mod.templates.cppm export module mod.templates; diff --git a/clang/test/Modules/Reachability-using.cpp b/clang/test/Modules/Reachability-using.cpp index 642b97dd8432c3..8301bfbedf8704 100644 --- a/clang/test/Modules/Reachability-using.cpp +++ b/clang/test/Modules/Reachability-using.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/mod.cppm -emit-module-interface -o %t/mod.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/mod.cppm -emit-reduced-module-interface -o %t/mod.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // //--- mod.cppm export module mod; diff --git a/clang/test/Modules/concept.cppm b/clang/test/Modules/concept.cppm index 0fdb5ea8968085..4464cf7c0a416c 100644 --- a/clang/test/Modules/concept.cppm +++ b/clang/test/Modules/concept.cppm @@ -11,7 +11,6 @@ // RUN: %clang_cc1 -std=c++20 -fskip-odr-check-in-gmf -fprebuilt-module-path=%t -I%t \ // RUN: -DDIFFERENT -DSKIP_ODR_CHECK_IN_GMF %t/B.cppm -verify - //--- foo.h #ifndef FOO_H #define FOO_H diff --git a/clang/test/Modules/concept_differ.cppm b/clang/test/Modules/concept_differ.cppm index ccb29d26e53d13..525ee2d4edcc8e 100644 --- a/clang/test/Modules/concept_differ.cppm +++ b/clang/test/Modules/concept_differ.cppm @@ -5,6 +5,11 @@ // RUN: %clang_cc1 -x c++ -std=c++20 %t/A.cppm -I%t -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -x c++ -std=c++20 %t/B.cppm -I%t -emit-module-interface -o %t/B.pcm // RUN: %clang_cc1 -x c++ -std=c++20 -fprebuilt-module-path=%t %t/foo.cpp -verify +// +// RUN: rm %t/A.pcm %t/B.pcm +// RUN: %clang_cc1 -x c++ -std=c++20 %t/A.cppm -I%t -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -x c++ -std=c++20 %t/B.cppm -I%t -emit-reduced-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -x c++ -std=c++20 -fprebuilt-module-path=%t %t/foo.cpp -verify //--- foo.h template diff --git a/clang/test/Modules/ctor.arg.dep.cppm b/clang/test/Modules/ctor.arg.dep.cppm index 0e5b1a694f6a5e..10924bfe0f1bdc 100644 --- a/clang/test/Modules/ctor.arg.dep.cppm +++ b/clang/test/Modules/ctor.arg.dep.cppm @@ -5,6 +5,10 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -I%t -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only // +// RUN: rm %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -I%t -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// //--- foo.h namespace ns { diff --git a/clang/test/Modules/cxx20-10-1-ex1.cpp b/clang/test/Modules/cxx20-10-1-ex1.cpp index b330e0a6c9a9d8..4445b19ea86cf4 100644 --- a/clang/test/Modules/cxx20-10-1-ex1.cpp +++ b/clang/test/Modules/cxx20-10-1-ex1.cpp @@ -19,6 +19,22 @@ // RUN: -fmodule-file=A=%t/A.pcm -fmodule-file=A:Foo=%t/A_Foo.pcm \ // RUN: -fmodule-file=A:Internals=%t/A_Internals.pcm -o %t/ex1.o +// RUN: rm %t/A_Internals.pcm %t/A_Foo.pcm %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex1-tu1.cpp \ +// RUN: -o %t/A_Internals.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex1-tu2.cpp \ +// RUN: -fmodule-file=A:Internals=%t/A_Internals.pcm -o %t/A_Foo.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex1-tu3.cpp \ +// RUN: -fmodule-file=A:Internals=%t/A_Internals.pcm \ +// RUN: -fmodule-file=A:Foo=%t/A_Foo.pcm -o %t/A.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-obj %t/std10-1-ex1-tu4.cpp \ +// RUN: -fmodule-file=A:Internals=%t/A_Internals.pcm \ +// RUN: -fmodule-file=A:Foo=%t/A_Foo.pcm \ +// RUN: -fmodule-file=A=%t/A.pcm -o %t/ex1.o + // expected-no-diagnostics //--- std10-1-ex1-tu1.cpp diff --git a/clang/test/Modules/cxx20-10-1-ex2.cpp b/clang/test/Modules/cxx20-10-1-ex2.cpp index 8b908d5fa2eda6..fc61d89926d448 100644 --- a/clang/test/Modules/cxx20-10-1-ex2.cpp +++ b/clang/test/Modules/cxx20-10-1-ex2.cpp @@ -5,26 +5,50 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-1-ex2-tu1.cpp \ // RUN: -o %t/B_Y.pcm - +// // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-1-ex2-tu2.cpp \ // RUN: -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B.pcm - +// // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-1-ex2-tu3.cpp \ // RUN: -o %t/B_X1.pcm -verify - +// // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-1-ex2-tu4.cpp \ // RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B_X2.pcm - +// // RUN: %clang_cc1 -std=c++20 -emit-obj %t/std10-1-ex2-tu5.cpp \ // RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/b_tu5.o - +// // RUN: %clang_cc1 -std=c++20 -S %t/std10-1-ex2-tu6.cpp \ // RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/b_tu6.s -verify - +// // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-1-ex2-tu7.cpp \ // RUN: -fmodule-file=B:X2=%t/B_X2.pcm -fmodule-file=B=%t/B.pcm \ // RUN: -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B_X3.pcm -verify +// Test again with reduced BMI. +// RUN: rm %t/B_X2.pcm %t/B.pcm %t/B_Y.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex2-tu1.cpp \ +// RUN: -o %t/B_Y.pcm +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex2-tu2.cpp \ +// RUN: -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B.pcm +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex2-tu3.cpp \ +// RUN: -o %t/B_X1.pcm -verify +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex2-tu4.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B_X2.pcm +// +// RUN: %clang_cc1 -std=c++20 -emit-obj %t/std10-1-ex2-tu5.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/b_tu5.o +// +// RUN: %clang_cc1 -std=c++20 -S %t/std10-1-ex2-tu6.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=B:Y=%t/B_Y.pcm -o %t/b_tu6.s -verify +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-1-ex2-tu7.cpp \ +// RUN: -fmodule-file=B:X2=%t/B_X2.pcm -fmodule-file=B=%t/B.pcm \ +// RUN: -fmodule-file=B:Y=%t/B_Y.pcm -o %t/B_X3.pcm -verify + //--- std10-1-ex2-tu1.cpp module B:Y; int y(); diff --git a/clang/test/Modules/cxx20-10-2-ex2.cpp b/clang/test/Modules/cxx20-10-2-ex2.cpp index bc66d6a2ec1a92..b48d96478b9a65 100644 --- a/clang/test/Modules/cxx20-10-2-ex2.cpp +++ b/clang/test/Modules/cxx20-10-2-ex2.cpp @@ -14,6 +14,18 @@ // RUN: -fmodule-file=%t/std-10-2-ex2-c.pcm -fmodule-file=X=%t/X.pcm \ // RUN: -pedantic-errors -verify -o %t/M.pcm +// Test again with reduced BMI. +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -I %t \ +// RUN: -xc++-user-header std-10-2-ex2-c.h -o %t/std-10-2-ex2-c.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std-10-2-ex2-tu1.cpp \ +// RUN: -o %t/X.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std-10-2-ex2-tu2.cpp \ +// RUN: -fmodule-file=%t/std-10-2-ex2-c.pcm -fmodule-file=X=%t/X.pcm \ +// RUN: -pedantic-errors -verify -o %t/M.pcm + + //--- std-10-2-ex2-b.h int f(); diff --git a/clang/test/Modules/cxx20-10-2-ex5.cpp b/clang/test/Modules/cxx20-10-2-ex5.cpp index 49c5934c8f2172..f222568072393f 100644 --- a/clang/test/Modules/cxx20-10-2-ex5.cpp +++ b/clang/test/Modules/cxx20-10-2-ex5.cpp @@ -13,6 +13,18 @@ // RUN: %clang_cc1 -std=c++20 -emit-obj %t/std-10-2-ex5-tu3.cpp \ // RUN: -fmodule-file=M=%t/M.pcm -verify -o %t/main.o +// Test again with reduced BMI. +// RUN: rm %t/M.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std-10-2-ex5-tu1.cpp \ +// RUN: -o %t/M.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-obj %t/std-10-2-ex5-tu2.cpp \ +// RUN: -fmodule-file=M=%t/M.pcm -o %t/tu-2.o + +// RUN: %clang_cc1 -std=c++20 -emit-obj %t/std-10-2-ex5-tu3.cpp \ +// RUN: -fmodule-file=M=%t/M.pcm -verify -o %t/main.o + + //--- std-10-2-ex5-tu1.cpp export module M; export struct X { diff --git a/clang/test/Modules/cxx20-10-3-ex1.cpp b/clang/test/Modules/cxx20-10-3-ex1.cpp index 5d6e2554f753b0..99b88c7e442ffd 100644 --- a/clang/test/Modules/cxx20-10-3-ex1.cpp +++ b/clang/test/Modules/cxx20-10-3-ex1.cpp @@ -14,6 +14,20 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-3-ex1-tu4.cpp \ // RUN: -fmodule-file=M:Part=%t/M_Part.pcm -o %t/M.pcm +// Test again with reduced BMI. +// RUN: rm %t/M_PartImpl.pcm %t/M.pcm %t/M_Part.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex1-tu1.cpp \ +// RUN: -o %t/M_PartImpl.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex1-tu2.cpp \ +// RUN: -fmodule-file=M:PartImpl=%t/M_PartImpl.pcm -o %t/M.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex1-tu3.cpp \ +// RUN: -o %t/M_Part.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex1-tu4.cpp \ +// RUN: -fmodule-file=M:Part=%t/M_Part.pcm -o %t/M.pcm + //--- std10-3-ex1-tu1.cpp module M:PartImpl; diff --git a/clang/test/Modules/cxx20-10-3-ex2.cpp b/clang/test/Modules/cxx20-10-3-ex2.cpp index b1d6d669c0a0e6..40566c00f578c2 100644 --- a/clang/test/Modules/cxx20-10-3-ex2.cpp +++ b/clang/test/Modules/cxx20-10-3-ex2.cpp @@ -11,6 +11,16 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/std10-3-ex2-tu3.cpp \ // RUN: -o %t/M.pcm -verify +// Test again with reduced BMI. +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex2-tu1.cpp \ +// RUN: -o %t/M.pcm + +// RUN: %clang_cc1 -std=c++20 -S %t/std10-3-ex2-tu2.cpp \ +// RUN: -fmodule-file=M=%t/M.pcm -o %t/tu_8.s -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/std10-3-ex2-tu3.cpp \ +// RUN: -o %t/M.pcm -verify + //--- std10-3-ex2-tu1.cpp export module M; diff --git a/clang/test/Modules/cxx20-10-5-ex1.cpp b/clang/test/Modules/cxx20-10-5-ex1.cpp index a83162c5c15017..0435b3a64c075d 100644 --- a/clang/test/Modules/cxx20-10-5-ex1.cpp +++ b/clang/test/Modules/cxx20-10-5-ex1.cpp @@ -11,6 +11,18 @@ // RUN: %clang_cc1 -std=c++20 std-10-5-ex1-use.cpp -fmodule-file=A=A.pcm \ // RUN: -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: rm A.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface std-10-5-ex1-interface.cpp \ +// RUN: -DBAD_FWD_DECL -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface std-10-5-ex1-interface.cpp \ +// RUN: -o A.pcm + +// RUN: %clang_cc1 -std=c++20 std-10-5-ex1-use.cpp -fmodule-file=A=A.pcm \ +// RUN: -fsyntax-only -verify + + //--- std-10-5-ex1-interface.cpp export module A; diff --git a/clang/test/Modules/cxx20-import-diagnostics-a.cpp b/clang/test/Modules/cxx20-import-diagnostics-a.cpp index a5cf44ed82d5ff..1b38259e0358c0 100644 --- a/clang/test/Modules/cxx20-import-diagnostics-a.cpp +++ b/clang/test/Modules/cxx20-import-diagnostics-a.cpp @@ -36,6 +36,45 @@ // RUN: %clang_cc1 -std=c++20 -emit-obj %t/import-diags-tu11.cpp \ // RUN: -fmodule-file=C=%t/C.pcm -o %t/impl.o +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu1.cpp \ +// RUN: -o %t/B.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu2.cpp \ +// RUN: -o %t/C.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu3.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=C=%t/C.pcm -o %t/AOK1.pcm + +// RUN: %clang_cc1 -std=c++20 -S %t/import-diags-tu4.cpp \ +// RUN: -fmodule-file=AOK1=%t/AOK1.pcm -fmodule-file=B=%t/B.pcm \ +// RUN: -fmodule-file=C=%t/C.pcm -o %t/tu_3.s -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu5.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=C=%t/C.pcm -o %t/BC.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -S %t/import-diags-tu6.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-file=C=%t/C.pcm -o %t/tu_5.s -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu7.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -o %t/D.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu8.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -o %t/D.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -S %t/import-diags-tu9.cpp \ +// RUN: -fmodule-file=B=%t/B.pcm -o %t/tu_8.s -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/import-diags-tu10.cpp \ +// RUN: -o %t/B.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -emit-obj %t/import-diags-tu11.cpp \ +// RUN: -fmodule-file=C=%t/C.pcm -o %t/impl.o + // Test diagnostics for incorrect module import sequences. //--- import-diags-tu1.cpp diff --git a/clang/test/Modules/cxx20-import-diagnostics-b.cpp b/clang/test/Modules/cxx20-import-diagnostics-b.cpp index 7d432633552a25..db522d7babd3ae 100644 --- a/clang/test/Modules/cxx20-import-diagnostics-b.cpp +++ b/clang/test/Modules/cxx20-import-diagnostics-b.cpp @@ -22,6 +22,31 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/g.cpp \ // RUN: -fmodule-file=a=%t/a.pcm -o %t/g.pcm -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/a.cpp -o %t/a.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/c.cpp \ +// RUN: -fmodule-file=a=%t/a.pcm -o %t/c.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/d.cpp \ +// RUN: -fmodule-file=a=%t/a.pcm -o %t/d.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/e.cpp \ +// RUN: -fmodule-file=a=%t/a.pcm -o %t/e.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/a-part.cpp \ +// RUN: -o %t/a-part.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/f.cpp \ +// RUN: -fmodule-file=a=%t/a.pcm -o %t/f.pcm -verify + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/g.cpp \ +// RUN: -fmodule-file=a=%t/a.pcm -o %t/g.pcm -verify + //--- a.cpp export module a; diff --git a/clang/test/Modules/cxx20-module-file-info-macros.cpp b/clang/test/Modules/cxx20-module-file-info-macros.cpp index bc7df1c9f50b59..3b67e9b9acd410 100644 --- a/clang/test/Modules/cxx20-module-file-info-macros.cpp +++ b/clang/test/Modules/cxx20-module-file-info-macros.cpp @@ -17,6 +17,9 @@ // RUN: %clang_cc1 -std=c++20 %t/named_module.cppm -emit-module-interface -o %t/M.pcm // RUN: %clang_cc1 -module-file-info %t/M.pcm | FileCheck %t/named_module.cppm +// RUN: %clang_cc1 -std=c++20 %t/named_module.cppm -emit-reduced-module-interface -o %t/M.pcm +// RUN: %clang_cc1 -module-file-info %t/M.pcm | FileCheck %t/named_module.cppm + //--- foo.h #pragma once #define FOO diff --git a/clang/test/Modules/deduction-guide.cppm b/clang/test/Modules/deduction-guide.cppm index 9c959a71365dac..02ac2c0053cff5 100644 --- a/clang/test/Modules/deduction-guide.cppm +++ b/clang/test/Modules/deduction-guide.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-reduced-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + //--- foo.h template class Templ { diff --git a/clang/test/Modules/deduction-guide2.cppm b/clang/test/Modules/deduction-guide2.cppm index a163c365683101..889670b973f0d3 100644 --- a/clang/test/Modules/deduction-guide2.cppm +++ b/clang/test/Modules/deduction-guide2.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-reduced-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + //--- Templ.cppm export module Templ; export template diff --git a/clang/test/Modules/deduction-guide3.cppm b/clang/test/Modules/deduction-guide3.cppm index 8fa08a0625d7c8..1165dd40bcfb8c 100644 --- a/clang/test/Modules/deduction-guide3.cppm +++ b/clang/test/Modules/deduction-guide3.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/Templ.cppm -emit-reduced-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + //--- Templ.cppm export module Templ; template diff --git a/clang/test/Modules/derived_class.cpp b/clang/test/Modules/derived_class.cpp index ee9e0ae4637ec7..e0c5a652eba4ea 100644 --- a/clang/test/Modules/derived_class.cpp +++ b/clang/test/Modules/derived_class.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/foo.cppm -emit-module-interface -o %t/foo.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -emit-reduced-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // //--- bar.h struct bar_base { diff --git a/clang/test/Modules/duplicated-module-file-eq-module-name.cppm b/clang/test/Modules/duplicated-module-file-eq-module-name.cppm index e86dbe2b941ef8..57ffb560ab540a 100644 --- a/clang/test/Modules/duplicated-module-file-eq-module-name.cppm +++ b/clang/test/Modules/duplicated-module-file-eq-module-name.cppm @@ -8,6 +8,10 @@ // RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm // RUN: %clang_cc1 -std=c++20 %t/u.cpp -fmodule-file=a=%t/unexist.pcm \ // RUN: -fmodule-file=a=%t/a.pcm -verify -fsyntax-only +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/u.cpp -fmodule-file=a=%t/unexist.pcm \ +// RUN: -fmodule-file=a=%t/a.pcm -verify -fsyntax-only //--- a.cppm export module a; diff --git a/clang/test/Modules/enum-class.cppm b/clang/test/Modules/enum-class.cppm index 01ae8c0d8814da..992eb9d5e55100 100644 --- a/clang/test/Modules/enum-class.cppm +++ b/clang/test/Modules/enum-class.cppm @@ -6,6 +6,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only //--- foo.h enum class foo { diff --git a/clang/test/Modules/explicitly-specialized-template.cpp b/clang/test/Modules/explicitly-specialized-template.cpp index 89677254ea739a..2450bbe31bd9b7 100644 --- a/clang/test/Modules/explicitly-specialized-template.cpp +++ b/clang/test/Modules/explicitly-specialized-template.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/X.cppm -emit-module-interface -o %t/X.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // +// RUN: %clang_cc1 -std=c++20 %t/X.cppm -emit-reduced-module-interface -o %t/X.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify +// //--- foo.h #ifndef FOO_H #define FOO_H diff --git a/clang/test/Modules/export-language-linkage.cppm b/clang/test/Modules/export-language-linkage.cppm index 462b28d36cb44b..f389d9604ef3a8 100644 --- a/clang/test/Modules/export-language-linkage.cppm +++ b/clang/test/Modules/export-language-linkage.cppm @@ -8,6 +8,11 @@ // RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-module-interface -o %t/c.pcm // RUN: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify -fmodule-file=c=%t/c.pcm +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cpp -fmodule-file=a=%t/a.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -fsyntax-only -verify +// RUN: %clang_cc1 -module-file-info %t/a.pcm | FileCheck %t/a.cppm + //--- a.cppm export module a; export extern "C++" int foo() { return 43; } @@ -43,6 +48,7 @@ int use() { } //--- c.cppm +// expected-no-diagnostics export module c; extern "C++" { export int f(); @@ -59,5 +65,5 @@ int use() { int use_of_nonexported() { return h(); // expected-error {{declaration of 'h' must be imported from module 'c' before it is required}} - // expected-note@c.cppm:4 {{declaration here is not visible}} + // expected-note@c.cppm:5 {{declaration here is not visible}} } diff --git a/clang/test/Modules/ftime-trace.cppm b/clang/test/Modules/ftime-trace.cppm index 48cd4113ec7826..8882e85be15156 100644 --- a/clang/test/Modules/ftime-trace.cppm +++ b/clang/test/Modules/ftime-trace.cppm @@ -9,5 +9,14 @@ // RUN: %clang_cc1 -std=c++20 %t/a.pcm -ftime-trace=%t/a.json -o - // RUN: ls %t | grep "a.json" +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/a.pcm -ftime-trace=%t/a.json -o - +// RUN: ls %t | grep "a.json" + //--- a.cppm export module a; diff --git a/clang/test/Modules/inconsistent-deduction-guide-linkage.cppm b/clang/test/Modules/inconsistent-deduction-guide-linkage.cppm index abcbec07f97de0..3991e47ce21513 100644 --- a/clang/test/Modules/inconsistent-deduction-guide-linkage.cppm +++ b/clang/test/Modules/inconsistent-deduction-guide-linkage.cppm @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -std=c++20 %t/D.cppm -I%t -emit-module-interface -o %t/D.pcm // RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/D-part.cppm -I%t -fprebuilt-module-path=%t -verify +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -I%t -emit-reduced-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/A.cppm -I%t -fprebuilt-module-path=%t -verify +// +// RUN: %clang_cc1 -std=c++20 %t/D.cppm -I%t -emit-reduced-module-interface -o %t/D.pcm +// RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/D-part.cppm -I%t -fprebuilt-module-path=%t -verify + //--- A.cppm module; export module baz:A; diff --git a/clang/test/Modules/inconsistent-export.cppm b/clang/test/Modules/inconsistent-export.cppm index 5e94d2b37b7578..0c74ba9037702a 100644 --- a/clang/test/Modules/inconsistent-export.cppm +++ b/clang/test/Modules/inconsistent-export.cppm @@ -9,6 +9,19 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/use.cppm -fprebuilt-module-path=%t -emit-obj +// Test again with reduced BMI. +// RUN: rm -fr %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/m-a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/m-b.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/use.cppm -fprebuilt-module-path=%t -emit-obj + + //--- a.cppm export module m:a; namespace n { diff --git a/clang/test/Modules/inherited_arg.cppm b/clang/test/Modules/inherited_arg.cppm index eb66b70cdce336..a9b6efabb1e6f7 100644 --- a/clang/test/Modules/inherited_arg.cppm +++ b/clang/test/Modules/inherited_arg.cppm @@ -7,6 +7,14 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -fprebuilt-module-path=%t -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// Test again with reduced BMI. +// +// RUN: %clang_cc1 -std=c++20 %t/A-B.cppm -I%t -emit-reduced-module-interface -o %t/A-B.pcm +// RUN: %clang_cc1 -std=c++20 %t/A-C.cppm -I%t -emit-reduced-module-interface -o %t/A-C.pcm +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + + //--- foo.h template class pair {}; diff --git a/clang/test/Modules/instantiation-argdep-lookup.cppm b/clang/test/Modules/instantiation-argdep-lookup.cppm index fc9009a5bc13d5..62dabfb6efddcf 100644 --- a/clang/test/Modules/instantiation-argdep-lookup.cppm +++ b/clang/test/Modules/instantiation-argdep-lookup.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -I%t -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only // +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -I%t -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only +// //--- foo.h namespace ns { diff --git a/clang/test/Modules/lambdas.cppm b/clang/test/Modules/lambdas.cppm index 7f00cf6f8682ac..be614b0519161a 100644 --- a/clang/test/Modules/lambdas.cppm +++ b/clang/test/Modules/lambdas.cppm @@ -11,6 +11,21 @@ // RUN: -o %t/lambdas2.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ // RUN: -verify -DUSE_LAMBDA2 +// +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/lambdas.cppm -emit-reduced-module-interface \ +// RUN: -o %t/lambdas.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ +// RUN: -verify +// +// RUN: %clang_cc1 -std=c++20 %t/lambdas2.cppm -emit-reduced-module-interface \ +// RUN: -o %t/lambdas2.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ +// RUN: -verify -DUSE_LAMBDA2 //--- lambdas.h auto l1 = []() constexpr -> int { diff --git a/clang/test/Modules/merge-concepts-cxx-modules.cpp b/clang/test/Modules/merge-concepts-cxx-modules.cpp index 3d4f8435531a88..0127e8baad6b94 100644 --- a/clang/test/Modules/merge-concepts-cxx-modules.cpp +++ b/clang/test/Modules/merge-concepts-cxx-modules.cpp @@ -8,6 +8,18 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/conflicting.cppm -o %t/conflicting.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cppm -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/same_as.cppm -o %t/same_as.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -fprebuilt-module-path=%t %t/concepts.cppm -o %t/concepts.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -fprebuilt-module-path=%t %t/format.cppm -o %t/format.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/conflicting.cppm -o %t/conflicting.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cppm -fsyntax-only -verify + + //--- same_as.cppm export module same_as; export template diff --git a/clang/test/Modules/merge-constrained-friends.cpp b/clang/test/Modules/merge-constrained-friends.cpp index 8f0e9ed83cf296..d0317b99801e97 100644 --- a/clang/test/Modules/merge-constrained-friends.cpp +++ b/clang/test/Modules/merge-constrained-friends.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++23 %t/A.cppm -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++23 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++23 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++23 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + //--- A.cppm module; export module A; diff --git a/clang/test/Modules/merge-lambdas.cppm b/clang/test/Modules/merge-lambdas.cppm index a1d04ab4e234dd..4363e452c2bcd3 100644 --- a/clang/test/Modules/merge-lambdas.cppm +++ b/clang/test/Modules/merge-lambdas.cppm @@ -6,6 +6,10 @@ // RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -o %t/B.pcm // RUN: %clang_cc1 -std=c++20 %t/use.cppm -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -std=c++20 %t/use.cppm -fprebuilt-module-path=%t -fsyntax-only -verify + //--- lambda.h inline auto cmp = [](auto l, auto r) { return l < r; diff --git a/clang/test/Modules/merge-requires-with-lambdas.cppm b/clang/test/Modules/merge-requires-with-lambdas.cppm index 5767492047684b..c4d6e0539f41ea 100644 --- a/clang/test/Modules/merge-requires-with-lambdas.cppm +++ b/clang/test/Modules/merge-requires-with-lambdas.cppm @@ -17,6 +17,25 @@ // RUN: %clang_cc1 -std=c++20 %t/A3.cppm -emit-module-interface -o %t/A3.pcm // RUN: %clang_cc1 -std=c++20 %t/TestA3.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/A0.cppm -emit-reduced-module-interface -o %t/A0.pcm +// RUN: %clang_cc1 -std=c++20 %t/TestA.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// +// RUN: %clang_cc1 -std=c++20 %t/A1.cppm -emit-reduced-module-interface -o %t/A1.pcm +// RUN: %clang_cc1 -std=c++20 %t/TestA1.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// +// RUN: %clang_cc1 -std=c++20 %t/A2.cppm -emit-reduced-module-interface -o %t/A2.pcm +// RUN: %clang_cc1 -std=c++20 %t/TestA2.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// +// RUN: %clang_cc1 -std=c++20 %t/A3.cppm -emit-reduced-module-interface -o %t/A3.pcm +// RUN: %clang_cc1 -std=c++20 %t/TestA3.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + + //--- A.h template concept A = requires(const _Tp& __t) { [](const __Up&) {}(__t); }; diff --git a/clang/test/Modules/merge-var-template-spec-cxx-modules.cppm b/clang/test/Modules/merge-var-template-spec-cxx-modules.cppm index a451bfe7804d33..db3f4cd5187169 100644 --- a/clang/test/Modules/merge-var-template-spec-cxx-modules.cppm +++ b/clang/test/Modules/merge-var-template-spec-cxx-modules.cppm @@ -7,6 +7,11 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface -fprebuilt-module-path=%t %t/reexport2.cppm -o %t/reexport2.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/use.cppm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/var_def.cppm -o %t/var_def.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -fprebuilt-module-path=%t %t/reexport1.cppm -o %t/reexport1.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -fprebuilt-module-path=%t %t/reexport2.cppm -o %t/reexport2.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/use.cppm -fsyntax-only -verify + //--- use.cppm import reexport1; import reexport2; diff --git a/clang/test/Modules/mismatch-diagnostics.cpp b/clang/test/Modules/mismatch-diagnostics.cpp index f8ce987cfba572..5a026aa1f6c020 100644 --- a/clang/test/Modules/mismatch-diagnostics.cpp +++ b/clang/test/Modules/mismatch-diagnostics.cpp @@ -13,6 +13,17 @@ // RUN: -fprebuilt-module-path=%t/prebuilt_modules -DCHECK_MISMATCH \ // RUN: %t/use.cpp 2>&1 | FileCheck %s +// Test again with reduced BMI. +// RUN: %clang_cc1 -triple %itanium_abi_triple \ +// RUN: -std=c++20 -fprebuilt-module-path=%t/prebuilt-modules \ +// RUN: -emit-reduced-module-interface -pthread -DBUILD_MODULE \ +// RUN: %t/mismatching_module.cppm -o \ +// RUN: %t/prebuilt_modules/mismatching_module.pcm +// +// RUN: not %clang_cc1 -triple %itanium_abi_triple -std=c++20 \ +// RUN: -fprebuilt-module-path=%t/prebuilt_modules -DCHECK_MISMATCH \ +// RUN: %t/use.cpp 2>&1 | FileCheck %s + //--- mismatching_module.cppm export module mismatching_module; diff --git a/clang/test/Modules/module-init-duplicated-import.cppm b/clang/test/Modules/module-init-duplicated-import.cppm index 7adce11779566e..1326402bb4ded3 100644 --- a/clang/test/Modules/module-init-duplicated-import.cppm +++ b/clang/test/Modules/module-init-duplicated-import.cppm @@ -9,6 +9,17 @@ // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm \ // RUN: -fmodule-file=a=%t/a.pcm -S -emit-llvm -o - | FileCheck %t/m.cppm +// Test again with reduced BMI. +// Note that we can't use reduced BMI here for m.cppm since it is required +// to generate the backend code. +// RUN: rm %t/a.pcm %t/m.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm \ +// RUN: -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.cppm \ +// RUN: -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/m.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm \ +// RUN: -fmodule-file=a=%t/a.pcm -S -emit-llvm -o - | FileCheck %t/m.cppm + //--- a.cppm export module a; export struct A { diff --git a/clang/test/Modules/named-modules-adl-2.cppm b/clang/test/Modules/named-modules-adl-2.cppm index 655acfcd93f69a..a14b9a68d74e41 100644 --- a/clang/test/Modules/named-modules-adl-2.cppm +++ b/clang/test/Modules/named-modules-adl-2.cppm @@ -6,6 +6,10 @@ // RUN: %clang_cc1 -std=c++20 %t/b.cppm -fmodule-file=a=%t/a.pcm -emit-module-interface -o %t/b.pcm // RUN: %clang_cc1 -std=c++20 %t/c.cppm -fmodule-file=a=%t/a.pcm -fmodule-file=b=%t/b.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -fmodule-file=a=%t/a.pcm -emit-reduced-module-interface -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -fmodule-file=a=%t/a.pcm -fmodule-file=b=%t/b.pcm -fsyntax-only -verify + //--- a.cppm export module a; diff --git a/clang/test/Modules/named-modules-adl-3.cppm b/clang/test/Modules/named-modules-adl-3.cppm index 2fc2962c926b1b..d70946fa068b3a 100644 --- a/clang/test/Modules/named-modules-adl-3.cppm +++ b/clang/test/Modules/named-modules-adl-3.cppm @@ -14,6 +14,20 @@ // RUN: %clang_cc1 -std=c++20 -DEXPORT_OPERATOR %t/c.cppm -fmodule-file=a=%t/a.pcm \ // RUN: -fmodule-file=b=%t/b.pcm -fsyntax-only -verify +// Test again with reduced BMI. +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -fmodule-file=a=%t/a.pcm -emit-reduced-module-interface \ +// RUN: -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -fmodule-file=a=%t/a.pcm -fmodule-file=b=%t/b.pcm \ +// RUN: -fsyntax-only -verify +// +// RUN: %clang_cc1 -std=c++20 -DEXPORT_OPERATOR %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -DEXPORT_OPERATOR %t/b.cppm -fmodule-file=a=%t/a.pcm \ +// RUN: -emit-reduced-module-interface -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 -DEXPORT_OPERATOR %t/c.cppm -fmodule-file=a=%t/a.pcm \ +// RUN: -fmodule-file=b=%t/b.pcm -fsyntax-only -verify + //--- foo.h namespace n { diff --git a/clang/test/Modules/named-modules-adl.cppm b/clang/test/Modules/named-modules-adl.cppm index d5133ef367265a..ef250023f91e75 100644 --- a/clang/test/Modules/named-modules-adl.cppm +++ b/clang/test/Modules/named-modules-adl.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm // RUN: %clang_cc1 -std=c++20 %t/b.cppm -fmodule-file=a=%t/a.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -fmodule-file=a=%t/a.pcm -fsyntax-only -verify + //--- a.h namespace n { diff --git a/clang/test/Modules/no-duplicate-codegen-in-GMF.cppm b/clang/test/Modules/no-duplicate-codegen-in-GMF.cppm index a743b64cb18d6e..36a2d8bc8c95ce 100644 --- a/clang/test/Modules/no-duplicate-codegen-in-GMF.cppm +++ b/clang/test/Modules/no-duplicate-codegen-in-GMF.cppm @@ -10,6 +10,16 @@ // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/B.pcm -S -emit-llvm -o - \ // RUN: -fprebuilt-module-path=%t | FileCheck %t/B.cppm +// Test again with reduced BMI. Note that we need to generate full BMI for B.cppm +// since it is required to generate backend codes. +// RUN: rm %t/A.pcm %t/B.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/B.cppm -emit-module-interface -o %t/B.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/B.pcm -S -emit-llvm -o - \ +// RUN: -fprebuilt-module-path=%t | FileCheck %t/B.cppm + + //--- foo.h template diff --git a/clang/test/Modules/pair-unambiguous-ctor.cppm b/clang/test/Modules/pair-unambiguous-ctor.cppm index eb242244260cbd..24fb15959577b0 100644 --- a/clang/test/Modules/pair-unambiguous-ctor.cppm +++ b/clang/test/Modules/pair-unambiguous-ctor.cppm @@ -10,6 +10,15 @@ // RUN: %clang_cc1 -std=c++20 %t/algorithm.cppm -I%t -emit-module-interface -o %t/std-algorithm.pcm // RUN: %clang_cc1 -std=c++20 %t/Use.cppm -I%t -fprebuilt-module-path=%t -emit-module-interface -verify -o %t/Use.pcm +// Test again with reduced BMI. +// RUN: rm -fr %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/string.cppm -I%t -emit-reduced-module-interface -o %t/std-string.pcm +// RUN: %clang_cc1 -std=c++20 %t/algorithm.cppm -I%t -emit-reduced-module-interface -o %t/std-algorithm.pcm +// RUN: %clang_cc1 -std=c++20 %t/Use.cppm -I%t -fprebuilt-module-path=%t -emit-reduced-module-interface -verify -o %t/Use.pcm + //--- Use.cppm // expected-no-diagnostics module; diff --git a/clang/test/Modules/partial_specialization.cppm b/clang/test/Modules/partial_specialization.cppm index 3a01857172112e..1d65a375643a28 100644 --- a/clang/test/Modules/partial_specialization.cppm +++ b/clang/test/Modules/partial_specialization.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/A.cppm -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/A.cppm -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify +// //--- foo.h template inline constexpr bool IsSame = false; diff --git a/clang/test/Modules/placement-new-reachable.cpp b/clang/test/Modules/placement-new-reachable.cpp index 29263173d78f45..6b495a60306bc1 100644 --- a/clang/test/Modules/placement-new-reachable.cpp +++ b/clang/test/Modules/placement-new-reachable.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + //--- placement.h namespace std { using size_t = decltype(sizeof(0)); diff --git a/clang/test/Modules/polluted-operator.cppm b/clang/test/Modules/polluted-operator.cppm index 721ca061c939f4..2179fa098064ae 100644 --- a/clang/test/Modules/polluted-operator.cppm +++ b/clang/test/Modules/polluted-operator.cppm @@ -11,6 +11,9 @@ // RUN: %clang_cc1 -std=c++20 -fskip-odr-check-in-gmf %t/b.cppm -fprebuilt-module-path=%t \ // RUN: -emit-module-interface -DSKIP_ODR_CHECK_IN_GMF -o %t/b.pcm -verify +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -fprebuilt-module-path=%t -emit-reduced-module-interface -o %t/b.pcm -verify + //--- foo.h namespace std diff --git a/clang/test/Modules/pr54457.cppm b/clang/test/Modules/pr54457.cppm index ed67ec1065376e..d55bdfbf3b7582 100644 --- a/clang/test/Modules/pr54457.cppm +++ b/clang/test/Modules/pr54457.cppm @@ -9,6 +9,9 @@ // RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -o %t/C.pcm // RUN: %clang_cc1 -std=c++20 %t/UseC.cppm -fprebuilt-module-path=%t -verify -S -o - +// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-reduced-module-interface -o %t/C.pcm +// RUN: %clang_cc1 -std=c++20 %t/UseC.cppm -fprebuilt-module-path=%t -verify -S -o - + //--- A.cppm // expected-no-diagnostics export module A; diff --git a/clang/test/Modules/pr56916.cppm b/clang/test/Modules/pr56916.cppm index a435b06d5cf152..09cea6720427b3 100644 --- a/clang/test/Modules/pr56916.cppm +++ b/clang/test/Modules/pr56916.cppm @@ -8,6 +8,18 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fsyntax-only -fprebuilt-module-path=%t -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/M-A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -o %t/M-B.pcm +// RUN: %clang_cc1 -std=c++20 %t/M.cppm -emit-reduced-module-interface -o %t/M.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fsyntax-only -fprebuilt-module-path=%t -verify + + //--- foo.h template class Templ { diff --git a/clang/test/Modules/pr58532.cppm b/clang/test/Modules/pr58532.cppm index cf530b4ac2ccce..35bebb41431e7b 100644 --- a/clang/test/Modules/pr58532.cppm +++ b/clang/test/Modules/pr58532.cppm @@ -7,6 +7,12 @@ // RUN: %clang_cc1 -std=c++20 %t/implementation.cpp -fmodule-file=m=%t/m.pcm \ // RUN: -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: %clang_cc1 -std=c++20 %t/interface.cppm -emit-reduced-module-interface \ +// RUN: -o %t/m.pcm +// RUN: %clang_cc1 -std=c++20 %t/implementation.cpp -fmodule-file=m=%t/m.pcm \ +// RUN: -fsyntax-only -verify + //--- invisible.h #pragma once // This breaks things. const int kInvisibleSymbol = 0; diff --git a/clang/test/Modules/pr58716.cppm b/clang/test/Modules/pr58716.cppm index 3f97fca7d5e8a3..177802fe3afcb8 100644 --- a/clang/test/Modules/pr58716.cppm +++ b/clang/test/Modules/pr58716.cppm @@ -8,7 +8,7 @@ // // RUN: %clang_cc1 -triple=x86_64-linux-gnu -std=c++20 -emit-module-interface %t/m.cppm -o %t/m.pcm // RUN: %clang_cc1 -triple=x86_64-linux-gnu -std=c++20 %t/m.pcm -S -emit-llvm -o - | FileCheck %t/m.cppm -// + //--- m.cppm module; #include "fail.h" diff --git a/clang/test/Modules/pr59719.cppm b/clang/test/Modules/pr59719.cppm index 5aea8992a0ca85..5a600c8e36a4b6 100644 --- a/clang/test/Modules/pr59719.cppm +++ b/clang/test/Modules/pr59719.cppm @@ -7,6 +7,9 @@ // RUN: %clang_cc1 -std=c++20 %t/data.cppm -emit-module-interface -o %t/data.pcm // RUN: %clang_cc1 -std=c++20 %t/main.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/data.cppm -emit-reduced-module-interface -o %t/data.pcm +// RUN: %clang_cc1 -std=c++20 %t/main.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + //--- foo.h namespace std { diff --git a/clang/test/Modules/pr59780.cppm b/clang/test/Modules/pr59780.cppm index d4bbd52c13f1a4..ee81ca575d7bf6 100644 --- a/clang/test/Modules/pr59780.cppm +++ b/clang/test/Modules/pr59780.cppm @@ -9,6 +9,16 @@ // RUN: -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %t/use.cpp // RUN: %clang_cc1 -std=c++20 %t/a.pcm -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %t/a.cppm +// Test again with reduced BMI. +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -triple %itanium_abi_triple -emit-module-interface \ +// RUN: -o %t/a.full.pcm +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -triple %itanium_abi_triple -emit-reduced-module-interface \ +// RUN: -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/use.cpp -fprebuilt-module-path=%t -S \ +// RUN: -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %t/use.cpp +// RUN: %clang_cc1 -std=c++20 %t/a.full.pcm -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %t/a.cppm + + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr59999.cppm b/clang/test/Modules/pr59999.cppm index 23710de9fe1c55..54452c26de4710 100644 --- a/clang/test/Modules/pr59999.cppm +++ b/clang/test/Modules/pr59999.cppm @@ -11,6 +11,19 @@ // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.pcm \ // RUN: -fmodule-file=Module=%t/Module.pcm -S -emit-llvm -o - | FileCheck %t/Object.cppm +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Module.cppm \ +// RUN: -emit-reduced-module-interface -o %t/Module.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.cppm \ +// RUN: -fmodule-file=Module=%t/Module.pcm -emit-module-interface -o %t/Object.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.pcm \ +// RUN: -fmodule-file=Module=%t/Module.pcm -S -emit-llvm -o - | FileCheck %t/Object.cppm + + //--- Module.cppm export module Module; diff --git a/clang/test/Modules/pr60036.cppm b/clang/test/Modules/pr60036.cppm index 297132cfde60bd..ffbc5fd56c2730 100644 --- a/clang/test/Modules/pr60036.cppm +++ b/clang/test/Modules/pr60036.cppm @@ -24,6 +24,20 @@ // RUN: -fmodule-file=c=%t/c.pcm -fmodule-file=d=%t/d.pcm -fmodule-file=e=%t/e.pcm \ // RUN: -fmodule-file=f=%t/f.pcm -verify -fsyntax-only +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/c.pcm +// RUN: %clang_cc1 -std=c++20 %t/d.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/d.pcm +// RUN: %clang_cc1 -std=c++20 %t/e.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/e.pcm +// RUN: %clang_cc1 -std=c++20 %t/f.cppm -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/f.pcm +// RUN: %clang_cc1 -std=c++20 %t/g.cppm -fprebuilt-module-path=%t -verify -fsyntax-only + + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr60085.cppm b/clang/test/Modules/pr60085.cppm index fd6fd914a543c3..37d8b09350b42b 100644 --- a/clang/test/Modules/pr60085.cppm +++ b/clang/test/Modules/pr60085.cppm @@ -14,6 +14,23 @@ // RUN: -S -emit-llvm -disable-llvm-passes -o - -fprebuilt-module-path=%t \ // RUN: | FileCheck %t/a.cppm +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/d.cppm \ +// RUN: -emit-reduced-module-interface -o %t/d.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cppm \ +// RUN: -emit-reduced-module-interface -o %t/c.pcm -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cppm \ +// RUN: -emit-reduced-module-interface -o %t/b.pcm -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm \ +// RUN: -emit-module-interface -o %t/a.pcm -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.pcm \ +// RUN: -S -emit-llvm -disable-llvm-passes -o - -fprebuilt-module-path=%t \ +// RUN: | FileCheck %t/a.cppm + //--- d.cppm export module d; diff --git a/clang/test/Modules/pr60275.cppm b/clang/test/Modules/pr60275.cppm index 57b31c6952bea9..eb1ebc0e4330ac 100644 --- a/clang/test/Modules/pr60275.cppm +++ b/clang/test/Modules/pr60275.cppm @@ -5,7 +5,12 @@ // RUN: split-file %s %t // // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple -emit-module-interface %t/a.cppm -o %t/a.pcm -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cpp -fmodule-file=%t/a.pcm -emit-llvm -o - | FileCheck %t/b.cpp +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cpp -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/b.cpp + +// Test again with reduced BMI +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cpp -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/b.cpp + //--- foo.h consteval void global() {} diff --git a/clang/test/Modules/pr60486.cppm b/clang/test/Modules/pr60486.cppm index 13802a4917e6e7..1100662c43211e 100644 --- a/clang/test/Modules/pr60486.cppm +++ b/clang/test/Modules/pr60486.cppm @@ -7,6 +7,9 @@ // RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm // RUN: %clang_cc1 -std=c++20 -fmodule-file=a=%t/a.pcm %t/b.cppm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -fmodule-file=a=%t/a.pcm %t/b.cppm -fsyntax-only -verify + //--- foo.h template struct s { diff --git a/clang/test/Modules/pr60693.cppm b/clang/test/Modules/pr60693.cppm index c50791083a5bea..6fb3de60e59b08 100644 --- a/clang/test/Modules/pr60693.cppm +++ b/clang/test/Modules/pr60693.cppm @@ -7,6 +7,10 @@ // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm -emit-module-interface -o %t/a.pcm // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple -fmodule-file=a=%t/a.pcm %t/c.cpp -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp +// Test again with reduced BMI +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple -fmodule-file=a=%t/a.pcm %t/c.cpp -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr60775.cppm b/clang/test/Modules/pr60775.cppm index 4db027ba3600a9..35eb92512f4277 100644 --- a/clang/test/Modules/pr60775.cppm +++ b/clang/test/Modules/pr60775.cppm @@ -12,6 +12,19 @@ // RUN: %clang_cc1 -std=c++20 %t/f.cppm -emit-module-interface -fmodule-file=c=%t/c.pcm -o %t/f.pcm // RUN: %clang_cc1 -std=c++20 %t/g.cpp -fmodule-file=f=%t/f.pcm -fmodule-file=c=%t/c.pcm -verify -fsyntax-only +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -I%t -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cpp -fmodule-file=a=%t/a.pcm -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -I%t -emit-reduced-module-interface -o %t/c.pcm +// RUN: %clang_cc1 -std=c++20 %t/d.cppm -emit-reduced-module-interface -fmodule-file=c=%t/c.pcm -o %t/d.pcm +// RUN: %clang_cc1 -std=c++20 %t/e.cpp -fmodule-file=d=%t/d.pcm -fmodule-file=c=%t/c.pcm -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 %t/f.cppm -emit-reduced-module-interface -fmodule-file=c=%t/c.pcm -o %t/f.pcm +// RUN: %clang_cc1 -std=c++20 %t/g.cpp -fmodule-file=f=%t/f.pcm -fmodule-file=c=%t/c.pcm -verify -fsyntax-only + //--- initializer_list.h namespace std { typedef decltype(sizeof(int)) size_t; diff --git a/clang/test/Modules/pr60890.cppm b/clang/test/Modules/pr60890.cppm index 2560bec5b43351..488b512aaac293 100644 --- a/clang/test/Modules/pr60890.cppm +++ b/clang/test/Modules/pr60890.cppm @@ -9,6 +9,12 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/c.cppm -fprebuilt-module-path=%t -o %t/c.pcm // RUN: %clang_cc1 -std=c++20 %t/d.cpp -fprebuilt-module-path=%t -S -emit-llvm -o - +// Test again with reduced BMI +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/b.cppm -fprebuilt-module-path=%t -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/c.cppm -fprebuilt-module-path=%t -o %t/c.pcm +// RUN: %clang_cc1 -std=c++20 %t/d.cpp -fprebuilt-module-path=%t -S -emit-llvm -o - + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr61065.cppm b/clang/test/Modules/pr61065.cppm index cf6fcdda78cd44..c79d7ac4457a11 100644 --- a/clang/test/Modules/pr61065.cppm +++ b/clang/test/Modules/pr61065.cppm @@ -10,6 +10,19 @@ // DISABLED: -fprebuilt-module-path=%t // DISABLED: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify -fprebuilt-module-path=%t +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/b.pcm \ +// RUN: -fprebuilt-module-path=%t +// DISABLED: %clang_cc1 -std=c++20 %t/c.cppm -emit-reduced-module-interface -o %t/c.pcm \ +// DISABLED: -fprebuilt-module-path=%t +// DISABLED: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify -fprebuilt-module-path=%t + + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr61065_2.cppm b/clang/test/Modules/pr61065_2.cppm index 10cc1a06b7e450..e898f4086af1de 100644 --- a/clang/test/Modules/pr61065_2.cppm +++ b/clang/test/Modules/pr61065_2.cppm @@ -11,6 +11,21 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/e.cpp -fsyntax-only -verify -fprebuilt-module-path=%t +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/b.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-reduced-module-interface -o %t/c.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/d.cppm -emit-reduced-module-interface -o %t/d.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/e.cpp -fsyntax-only -verify -fprebuilt-module-path=%t + + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr61067.cppm b/clang/test/Modules/pr61067.cppm index baee4b83de5660..b7f9d22e253854 100644 --- a/clang/test/Modules/pr61067.cppm +++ b/clang/test/Modules/pr61067.cppm @@ -12,6 +12,20 @@ // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \ // RUN: -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm \ +// RUN: -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cppm \ +// RUN: -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.pcm -S \ +// RUN: -emit-llvm -fmodule-file=a=%t/a.pcm -disable-llvm-passes -o - | FileCheck %t/b.cppm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \ +// RUN: -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp + //--- a.cppm export module a; diff --git a/clang/test/Modules/pr61317.cppm b/clang/test/Modules/pr61317.cppm index 4b54d26dc5a63b..9ed20e7947062f 100644 --- a/clang/test/Modules/pr61317.cppm +++ b/clang/test/Modules/pr61317.cppm @@ -8,6 +8,15 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -o %t/B.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + //--- foo.h #ifndef _FOO #define _FOO diff --git a/clang/test/Modules/pr61783.cppm b/clang/test/Modules/pr61783.cppm index 9cf773b0b282ba..c3bc853d2dee8e 100644 --- a/clang/test/Modules/pr61783.cppm +++ b/clang/test/Modules/pr61783.cppm @@ -9,6 +9,14 @@ // RUN: %clang_cc1 -std=c++20 -triple x86_64-pc-windows-msvc19.11.0 -fms-extensions %t/user.cpp -fmodule-file=mod=%t/mod.pcm \ // RUN: -S -emit-llvm -o - | FileCheck %t/user.cpp +// Test again with reduced BMI +// RUN: %clang_cc1 -std=c++20 -triple x86_64-pc-windows-msvc19.11.0 -fms-extensions %t/mod.cppm -emit-reduced-module-interface \ +// RUN: -o %t/mod.pcm +// RUN: %clang_cc1 -std=c++20 -triple x86_64-pc-windows-msvc19.11.0 -fms-extensions %t/mod.pcm -S -emit-llvm -o - | \ +// RUN: FileCheck %t/mod.cppm +// RUN: %clang_cc1 -std=c++20 -triple x86_64-pc-windows-msvc19.11.0 -fms-extensions %t/user.cpp -fmodule-file=mod=%t/mod.pcm \ +// RUN: -S -emit-llvm -o - | FileCheck %t/user.cpp + //--- mod.cppm module; diff --git a/clang/test/Modules/pr61892.cppm b/clang/test/Modules/pr61892.cppm index 99d02f36b2b54b..7b8905036cd449 100644 --- a/clang/test/Modules/pr61892.cppm +++ b/clang/test/Modules/pr61892.cppm @@ -2,11 +2,25 @@ // RUN: mkdir -p %t // RUN: split-file %s %t // +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ +// RUNX: -emit-module-interface %t/a.cppm -o %t/a.pcm +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ +// RUNX: %t/b.cpp -fmodule-file=a=%t/a.pcm -disable-llvm-passes \ +// RUNX: -emit-llvm -o - | FileCheck %t/b.cpp +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ +// RUNX: %t/c.cpp -fmodule-file=a=%t/a.pcm -disable-llvm-passes \ +// RUNX: -emit-llvm -o - | FileCheck %t/c.cpp + +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ -// RUN: -emit-module-interface %t/a.cppm -o %t/a.pcm -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ -// RUN: %t/b.cpp -fmodule-file=a=%t/a.pcm -disable-llvm-passes \ -// RUN: -emit-llvm -o - | FileCheck %t/b.cpp +// RUN: -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ +// RUNX: %t/b.cpp -fmodule-file=a=%t/a.pcm -disable-llvm-passes \ +// RUNX: -emit-llvm -o - | FileCheck %t/b.cpp // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple \ // RUN: %t/c.cpp -fmodule-file=a=%t/a.pcm -disable-llvm-passes \ // RUN: -emit-llvm -o - | FileCheck %t/c.cpp @@ -23,20 +37,10 @@ struct integer { export template int a = static_cast(integer()); -struct s { - ~s(); - operator int() const; -}; - -export template -auto d = s(); - int aa() { - return a + d; + return a; } -int dynamic_func(); -export inline int dynamic_var = dynamic_func(); //--- b.cpp import a; @@ -53,13 +57,9 @@ void b() {} //--- c.cpp import a; int c() { - return a + d + dynamic_var; + return a; } // The used variables are generated normally // CHECK-DAG: @_ZW1a1aIvE = -// CHECK-DAG: @_ZW1a1dIvE = -// CHECK-DAG: @_ZW1a11dynamic_var = linkonce_odr // CHECK-DAG: @_ZGVW1a1aIvE = -// CHECk-DAG: @_ZGVW1a1dIvE = -// CHECK-DAG: @_ZGVW1a11dynamic_var = linkonce_odr diff --git a/clang/test/Modules/pr62158.cppm b/clang/test/Modules/pr62158.cppm index 7a0761df771580..bb488fff108f28 100644 --- a/clang/test/Modules/pr62158.cppm +++ b/clang/test/Modules/pr62158.cppm @@ -6,6 +6,15 @@ // RUN: %clang_cc1 -std=c++20 %t/main.cpp -fmodule-file=lib=%t/lib.pcm \ // RUN: -verify -fsyntax-only +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/lib.cppm -o %t/lib.pcm +// RUN: %clang_cc1 -std=c++20 %t/main.cpp -fmodule-file=lib=%t/lib.pcm \ +// RUN: -verify -fsyntax-only + //--- header.h namespace lib::inline __1 { template diff --git a/clang/test/Modules/pr62359.cppm b/clang/test/Modules/pr62359.cppm index 4632457e57f189..69acc3ce303a57 100644 --- a/clang/test/Modules/pr62359.cppm +++ b/clang/test/Modules/pr62359.cppm @@ -12,6 +12,22 @@ // RUN: %clang_cc1 -std=c++20 -fopenmp %t/use.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only -verify // RUN: %clang_cc1 -std=c++20 -fopenmp %t/use2.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only -verify +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/Hello.cppm -o %t/Hello.pcm +// RUN: not %clang_cc1 -std=c++20 -fopenmp %t/use.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only \ +// RUN: 2>&1 | FileCheck %t/use.cpp +// RUN: not %clang_cc1 -std=c++20 -fopenmp %t/use2.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only \ +// RUN: 2>&1 | FileCheck %t/use2.cpp +// +// RUN: %clang_cc1 -std=c++20 -fopenmp -emit-reduced-module-interface %t/Hello.cppm -o %t/Hello.pcm +// RUN: %clang_cc1 -std=c++20 -fopenmp %t/use.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 -fopenmp %t/use2.cpp -fmodule-file=hello=%t/Hello.pcm -fsyntax-only -verify + + //--- Hello.cppm export module hello; export void hello() { diff --git a/clang/test/Modules/pr62589.cppm b/clang/test/Modules/pr62589.cppm index 4164c3405ac0e3..c5aec3ed81846f 100644 --- a/clang/test/Modules/pr62589.cppm +++ b/clang/test/Modules/pr62589.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++23 -emit-module-interface %t/a.cppm -o %t/a.pcm // RUN: %clang_cc1 -std=c++23 %t/b.cpp -fmodule-file=a=%t/a.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++23 -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUN: %clang_cc1 -std=c++23 %t/b.cpp -fmodule-file=a=%t/a.pcm -fsyntax-only -verify + //--- foo.h class TypeA {}; diff --git a/clang/test/Modules/pr62705.cppm b/clang/test/Modules/pr62705.cppm index 00769d2277f4f1..9d996ae297d7af 100644 --- a/clang/test/Modules/pr62705.cppm +++ b/clang/test/Modules/pr62705.cppm @@ -10,6 +10,14 @@ // RUN: %clang_cc1 %t/b.pcm -std=c++20 -triple %itanium_abi_triple \ // RUN: -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/b.cppm +// RUN: %clang_cc1 %t/a.cppm -std=c++20 -triple %itanium_abi_triple \ +// RUN: -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 %t/b.cppm -std=c++20 -triple %itanium_abi_triple \ +// RUN: -emit-module-interface -o %t/b.pcm \ +// RUN: -fmodule-file=a=%t/a.pcm +// RUN: %clang_cc1 %t/b.pcm -std=c++20 -triple %itanium_abi_triple \ +// RUN: -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/b.cppm + //--- foo.h namespace n { diff --git a/clang/test/Modules/pr62796.cppm b/clang/test/Modules/pr62796.cppm index f96e54bc6adede..58b72164e88bfc 100644 --- a/clang/test/Modules/pr62796.cppm +++ b/clang/test/Modules/pr62796.cppm @@ -6,6 +6,10 @@ // RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fmodule-file=Fibonacci.Cache=%t/Cache.pcm \ // RUN: -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/Cache.cppm -o %t/Cache.pcm +// RUN: %clang_cc1 -std=c++20 %t/Use.cpp -fmodule-file=Fibonacci.Cache=%t/Cache.pcm \ +// RUN: -fsyntax-only -verify + //--- Cache.cppm export module Fibonacci.Cache; diff --git a/clang/test/Modules/pr62943.cppm b/clang/test/Modules/pr62943.cppm index 27868b78220f5c..c3a373814a4398 100644 --- a/clang/test/Modules/pr62943.cppm +++ b/clang/test/Modules/pr62943.cppm @@ -9,6 +9,18 @@ // RUN: %clang_cc1 -std=c++20 %t/use.cpp -fprebuilt-module-path=%t \ // RUN: -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-reduced-module-interface \ +// RUN: -fprebuilt-module-path=%t -o %t/c.pcm +// RUN: %clang_cc1 -std=c++20 %t/use.cpp -fprebuilt-module-path=%t \ +// RUN: -fsyntax-only -verify + //--- foo.h #ifndef FOO_H #define FOO_H diff --git a/clang/test/Modules/pr63544.cppm b/clang/test/Modules/pr63544.cppm index 16224cfd010949..f079abaed09df8 100644 --- a/clang/test/Modules/pr63544.cppm +++ b/clang/test/Modules/pr63544.cppm @@ -8,6 +8,18 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++23 %t/pr63544.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++23 %t/a.cppm -emit-reduced-module-interface -o %t/m-a.pcm +// RUN: %clang_cc1 -std=c++23 %t/b.cppm -emit-reduced-module-interface -o %t/m-b.pcm +// RUN: %clang_cc1 -std=c++23 %t/m.cppm -emit-reduced-module-interface -o %t/m.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++23 %t/pr63544.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + + //--- foo.h namespace std { diff --git a/clang/test/Modules/pr63595.cppm b/clang/test/Modules/pr63595.cppm index 13a5f84a3e71f2..7c5395e065de54 100644 --- a/clang/test/Modules/pr63595.cppm +++ b/clang/test/Modules/pr63595.cppm @@ -6,6 +6,16 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface -I%t %t/module2.cppm -o %t/module2.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/merge.cpp -verify -fsyntax-only +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -I%t %t/module1.cppm -o %t/module1.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -I%t %t/module2.cppm -o %t/module2.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/merge.cpp -verify -fsyntax-only + + //--- header.h namespace NS { template diff --git a/clang/test/Modules/pr67627.cppm b/clang/test/Modules/pr67627.cppm index 3d4410229080a9..d3f8496c47c2a7 100644 --- a/clang/test/Modules/pr67627.cppm +++ b/clang/test/Modules/pr67627.cppm @@ -5,6 +5,10 @@ // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 %t/B.cppm -fmodule-file=A=%t/A.pcm -fsyntax-only -verify +// RUN: rm %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -fmodule-file=A=%t/A.pcm -fsyntax-only -verify + //--- A.cppm export module A; diff --git a/clang/test/Modules/pr67893.cppm b/clang/test/Modules/pr67893.cppm index 00b024ecc2eb11..58990cec01d666 100644 --- a/clang/test/Modules/pr67893.cppm +++ b/clang/test/Modules/pr67893.cppm @@ -9,6 +9,15 @@ // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm \ // RUN: -fprebuilt-module-path=%t -S -emit-llvm -o - | FileCheck %t/m.cppm +// Test again with reduced BMI +// +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm \ +// RUN: -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.cppm \ +// RUN: -emit-reduced-module-interface -fprebuilt-module-path=%t -o %t/m.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm \ +// RUN: -fprebuilt-module-path=%t -S -emit-llvm -o - | FileCheck %t/m.cppm + //--- a.cppm export module a; export struct A { diff --git a/clang/test/Modules/predefined.cpp b/clang/test/Modules/predefined.cpp index fbe0c4e23ca59c..8f897f5ace938f 100644 --- a/clang/test/Modules/predefined.cpp +++ b/clang/test/Modules/predefined.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -x c++ -std=c++20 -emit-module-interface a.h -o a.pcm -fms-extensions -verify // RUN: %clang_cc1 -std=c++20 a.cpp -fmodule-file=A=a.pcm -fms-extensions -fsyntax-only -verify +// RUN: %clang_cc1 -x c++ -std=c++20 -emit-reduced-module-interface a.h -o a.pcm -fms-extensions -verify +// RUN: %clang_cc1 -std=c++20 a.cpp -fmodule-file=A=a.pcm -fms-extensions -fsyntax-only -verify + //--- a.h // expected-no-diagnostics diff --git a/clang/test/Modules/preferred_name.cppm b/clang/test/Modules/preferred_name.cppm index 46ad96cb1abc33..2f17058678455c 100644 --- a/clang/test/Modules/preferred_name.cppm +++ b/clang/test/Modules/preferred_name.cppm @@ -8,6 +8,16 @@ // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use1.cpp -verify -fsyntax-only // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use2.cpp -verify -fsyntax-only + +// Test again with reduced BMI. +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use1.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use2.cpp -verify -fsyntax-only // //--- foo.h template diff --git a/clang/test/Modules/redefinition-merges.cppm b/clang/test/Modules/redefinition-merges.cppm index 9ab4006f985fa9..13032b22ee60e4 100644 --- a/clang/test/Modules/redefinition-merges.cppm +++ b/clang/test/Modules/redefinition-merges.cppm @@ -12,6 +12,12 @@ // RUN: %clang_cc1 -std=c++20 -I%t %t/M.cppm -emit-module-interface -o %t/M.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use1.cpp -verify -fsyntax-only // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use2.cpp -verify -fsyntax-only + +// / Test again with reduced BMI. +// RUN: %clang_cc1 -std=c++20 -I%t %t/M.cppm -emit-reduced-module-interface -o %t/M.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use1.cpp -verify -fsyntax-only +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use2.cpp -verify -fsyntax-only + // //--- foo.h #ifndef FOO diff --git a/clang/test/Modules/redundant-template-default-arg.cpp b/clang/test/Modules/redundant-template-default-arg.cpp index 6807b45e513954..20a806c4c818ab 100644 --- a/clang/test/Modules/redundant-template-default-arg.cpp +++ b/clang/test/Modules/redundant-template-default-arg.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-module-interface -o %t/foo.pcm // RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -I%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-reduced-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -I%t -fsyntax-only -verify + //--- foo.h template T u; diff --git a/clang/test/Modules/redundant-template-default-arg2.cpp b/clang/test/Modules/redundant-template-default-arg2.cpp index 41deb112cfa6ea..ae1f0c7e69cc06 100644 --- a/clang/test/Modules/redundant-template-default-arg2.cpp +++ b/clang/test/Modules/redundant-template-default-arg2.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-module-interface -o %t/foo.pcm // RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-reduced-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -fsyntax-only -verify + //--- foo.cppm export module foo; export template diff --git a/clang/test/Modules/redundant-template-default-arg3.cpp b/clang/test/Modules/redundant-template-default-arg3.cpp index 8bb222ac91ffce..e4464c40e97687 100644 --- a/clang/test/Modules/redundant-template-default-arg3.cpp +++ b/clang/test/Modules/redundant-template-default-arg3.cpp @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-module-interface -o %t/foo.pcm // RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -I%t/. -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -I%t -emit-reduced-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -fprebuilt-module-path=%t -std=c++20 %t/use.cpp -I%t/. -fsyntax-only -verify + //--- foo.h template T v; diff --git a/clang/test/Modules/search-partitions.cpp b/clang/test/Modules/search-partitions.cpp index 571160def7e9b7..92732958db94e6 100644 --- a/clang/test/Modules/search-partitions.cpp +++ b/clang/test/Modules/search-partitions.cpp @@ -14,6 +14,22 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/moduleA.cpp \ // RUN: -fprebuilt-module-path=%t +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/partition1.cpp \ +// RUN: -o %t/A-Part1.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/partition2.cpp \ +// RUN: -o %t/A-Part2.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/partition3.cpp \ +// RUN: -o %t/A-Part3.pcm + +// RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/moduleA.cpp -fprebuilt-module-path=%t + // expected-no-diagnostics //--- partition1.cpp diff --git a/clang/test/Modules/seperated-member-function-definition-for-template-class.cppm b/clang/test/Modules/seperated-member-function-definition-for-template-class.cppm index e32da39d9df1af..1465c33c3625c8 100644 --- a/clang/test/Modules/seperated-member-function-definition-for-template-class.cppm +++ b/clang/test/Modules/seperated-member-function-definition-for-template-class.cppm @@ -12,6 +12,18 @@ // RUN: -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/use.cpp -fsyntax-only -verify -fprebuilt-module-path=%t +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/base.cppm -emit-reduced-module-interface -o %t/package-base.pcm +// RUN: %clang_cc1 -std=c++20 %t/child.cppm -emit-reduced-module-interface -o %t/package-child.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/package.cppm -emit-reduced-module-interface -o %t/package.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/use.cpp -fsyntax-only -verify -fprebuilt-module-path=%t + //--- base.cppm export module package:base; diff --git a/clang/test/Modules/template-function-specialization.cpp b/clang/test/Modules/template-function-specialization.cpp index 3eac92e7edb94c..1b6bf2de6ba1d9 100644 --- a/clang/test/Modules/template-function-specialization.cpp +++ b/clang/test/Modules/template-function-specialization.cpp @@ -4,7 +4,10 @@ // // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/foo.cppm -o %t/foo.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only -// + +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/foo.cppm -o %t/foo.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only + //--- foo.cppm module; # 3 __FILE__ 1 // use the next physical line number here (and below) diff --git a/clang/test/Modules/template-lambdas.cppm b/clang/test/Modules/template-lambdas.cppm index 69117a1a04fc7b..e82cb1f3ad85ac 100644 --- a/clang/test/Modules/template-lambdas.cppm +++ b/clang/test/Modules/template-lambdas.cppm @@ -12,6 +12,21 @@ // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ // RUN: -verify -DUSE_LAMBDA2 +// Test again with reduced BMI +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/template_lambdas.cppm -emit-reduced-module-interface \ +// RUN: -o %t/lambdas.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ +// RUN: -verify +// +// RUN: %clang_cc1 -std=c++20 %t/template_lambdas2.cppm -emit-reduced-module-interface \ +// RUN: -o %t/lambdas2.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only \ +// RUN: -verify -DUSE_LAMBDA2 + //--- lambdas.h auto l1 = []() constexpr -> int { return I; diff --git a/clang/test/Modules/template-pack.cppm b/clang/test/Modules/template-pack.cppm index eca17f31f015e5..278c1c2d54ccf5 100644 --- a/clang/test/Modules/template-pack.cppm +++ b/clang/test/Modules/template-pack.cppm @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.cppm -o %t/a.pcm // RUN: %clang_cc1 -std=c++20 %t/b.cppm -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/a.cppm -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -fprebuilt-module-path=%t -fsyntax-only -verify + //--- foo.h namespace std diff --git a/clang/test/Modules/template_default_argument.cpp b/clang/test/Modules/template_default_argument.cpp index 5a7d1c04cf1817..202f8dd40d7a94 100644 --- a/clang/test/Modules/template_default_argument.cpp +++ b/clang/test/Modules/template_default_argument.cpp @@ -4,6 +4,9 @@ // // RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -o %t/B.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -fsyntax-only -verify // //--- templ.h template diff --git a/clang/test/Sema/attr-availability-visionos.c b/clang/test/Sema/attr-availability-visionos.c new file mode 100644 index 00000000000000..2c388c5d529073 --- /dev/null +++ b/clang/test/Sema/attr-availability-visionos.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple arm64-apple-xros1 -fapplication-extension -verify=visionos %s 2>&1 + +__attribute__((availability(xros, unavailable))) // visionos-warning {{unknown platform 'xros' in availability macro}} +void xros_unavail(); // visionos-note {{}} + +__attribute__((availability(xros_app_extension, unavailable))) // visionos-warning {{unknown platform 'xros_app_extension' in availability macro}} +void xros_ext_unavail(); // visionos-note {{}} + +__attribute__((availability(visionOSApplicationExtension, unavailable))) +void visionos_ext_unavail(); // visionos-note {{}} + +void use() { + xros_unavail(); // visionos-error {{'xros_unavail' is unavailable: not available on visionOS}} + xros_ext_unavail(); // visionos-error {{'xros_ext_unavail' is unavailable: not available on visionOS}} + visionos_ext_unavail(); // visionos-error {{'visionos_ext_unavail' is unavailable: not available on visionOS}} +} + +__attribute__((availability(visionOS, introduced=1.0))) +void visionos_introduced_1(); + +__attribute__((availability(visionos, introduced=1.1))) +void visionos_introduced_1_1(); // visionos-note 4 {{'visionos_introduced_1_1' has been marked as being introduced in visionOS 1.1 here, but the deployment target is visionOS 1}} + +void use2() { + if (__builtin_available(iOS 16.1, *)) + visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}} + + if (__builtin_available(xrOS 1.1, *)) // visionos-error {{unrecognized platform name xrOS}} + visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}} + + if (__builtin_available(xros_app_extension 1, *)) // visionos-error {{unrecognized platform name xros_app_extension}} + visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}} + + if (__builtin_available(visionOS 1.1, *)) + visionos_introduced_1_1(); + + visionos_introduced_1(); + visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}} +} diff --git a/clang/test/Sema/constant-builtins-2.c b/clang/test/Sema/constant-builtins-2.c index 2bdd7b06daabfe..0935abe4c65fbe 100644 --- a/clang/test/Sema/constant-builtins-2.c +++ b/clang/test/Sema/constant-builtins-2.c @@ -237,6 +237,13 @@ char popcount7[__builtin_popcountl(~0L) == BITSIZE(long) ? 1 : -1]; char popcount8[__builtin_popcountll(0LL) == 0 ? 1 : -1]; char popcount9[__builtin_popcountll(0xF0F0LL) == 8 ? 1 : -1]; char popcount10[__builtin_popcountll(~0LL) == BITSIZE(long long) ? 1 : -1]; +char popcount11[__builtin_popcountg(0U) == 0 ? 1 : -1]; +char popcount12[__builtin_popcountg(0xF0F0U) == 8 ? 1 : -1]; +char popcount13[__builtin_popcountg(~0U) == BITSIZE(int) ? 1 : -1]; +char popcount14[__builtin_popcountg(~0UL) == BITSIZE(long) ? 1 : -1]; +char popcount15[__builtin_popcountg(~0ULL) == BITSIZE(long long) ? 1 : -1]; +char popcount16[__builtin_popcountg(~(unsigned __int128)0) == BITSIZE(__int128) ? 1 : -1]; +char popcount17[__builtin_popcountg(~(unsigned _BitInt(128))0) == BITSIZE(_BitInt(128)) ? 1 : -1]; char parity1[__builtin_parity(0) == 0 ? 1 : -1]; char parity2[__builtin_parity(0xb821) == 0 ? 1 : -1]; diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp index 9e2ae07cbe4c9c..efb391ba0922d8 100644 --- a/clang/test/SemaCXX/constant-expression-cxx11.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp @@ -1273,8 +1273,8 @@ namespace PR11595 { struct B { B(); A& x; }; static_assert(B().x == 3, ""); // expected-error {{constant expression}} expected-note {{non-literal type 'B' cannot be used in a constant expression}} - constexpr bool f(int k) { // expected-error {{constexpr function never produces a constant expression}} - return B().x == k; // expected-note {{non-literal type 'B' cannot be used in a constant expression}} + constexpr bool f(int k) { // cxx11_20-error {{constexpr function never produces a constant expression}} + return B().x == k; // cxx11_20-note {{non-literal type 'B' cannot be used in a constant expression}} } } @@ -1326,8 +1326,8 @@ namespace ExternConstexpr { constexpr int g() { return q; } // expected-note {{outside its lifetime}} constexpr int q = g(); // expected-error {{constant expression}} expected-note {{in call}} - extern int r; // expected-note {{here}} - constexpr int h() { return r; } // expected-error {{never produces a constant}} expected-note {{read of non-const}} + extern int r; // cxx11_20-note {{here}} + constexpr int h() { return r; } // cxx11_20-error {{never produces a constant}} cxx11_20-note {{read of non-const}} struct S { int n; }; extern const S s; @@ -1678,7 +1678,7 @@ namespace ImplicitConstexpr { struct R { constexpr R() noexcept; constexpr R(const R&) noexcept; constexpr R(R&&) noexcept; ~R() noexcept; }; struct S { R r; }; // expected-note 3{{here}} struct T { T(const T&) noexcept; T(T &&) noexcept; ~T() noexcept; }; - struct U { T t; }; // expected-note 3{{here}} + struct U { T t; }; // cxx11_20-note 3{{here}} static_assert(!__is_literal_type(Q), ""); static_assert(!__is_literal_type(R), ""); static_assert(!__is_literal_type(S), ""); @@ -1691,9 +1691,9 @@ namespace ImplicitConstexpr { friend S::S() noexcept; // expected-error {{follows constexpr}} friend S::S(S&&) noexcept; // expected-error {{follows constexpr}} friend S::S(const S&) noexcept; // expected-error {{follows constexpr}} - friend constexpr U::U() noexcept; // expected-error {{follows non-constexpr}} - friend constexpr U::U(U&&) noexcept; // expected-error {{follows non-constexpr}} - friend constexpr U::U(const U&) noexcept; // expected-error {{follows non-constexpr}} + friend constexpr U::U() noexcept; // cxx11_20-error {{follows non-constexpr}} + friend constexpr U::U(U&&) noexcept; // cxx11_20-error {{follows non-constexpr}} + friend constexpr U::U(const U&) noexcept; // cxx11_20-error {{follows non-constexpr}} }; } @@ -1906,9 +1906,9 @@ namespace StmtExpr { }); } static_assert(g(123) == 15129, ""); - constexpr int h() { // expected-error {{never produces a constant}} + constexpr int h() { // cxx11_20-error {{never produces a constant}} return ({ // expected-warning {{extension}} - return 0; // expected-note {{not supported}} + return 0; // cxx11_20-note {{not supported}} 1; }); } @@ -2093,8 +2093,8 @@ namespace ZeroSizeTypes { // expected-note@-2 {{subtraction of pointers to type 'int[0]' of zero size}} int arr[5][0]; - constexpr int f() { // expected-error {{never produces a constant expression}} - return &arr[3] - &arr[0]; // expected-note {{subtraction of pointers to type 'int[0]' of zero size}} + constexpr int f() { // cxx11_20-error {{never produces a constant expression}} + return &arr[3] - &arr[0]; // cxx11_20-note {{subtraction of pointers to type 'int[0]' of zero size}} } } @@ -2118,8 +2118,8 @@ namespace NeverConstantTwoWays { // If we see something non-constant but foldable followed by something // non-constant and not foldable, we want the first diagnostic, not the // second. - constexpr int f(int n) { // expected-error {{never produces a constant expression}} - return (int *)(long)&n == &n ? // expected-note {{reinterpret_cast}} + constexpr int f(int n) { // cxx11_20-error {{never produces a constant expression}} + return (int *)(long)&n == &n ? // cxx11_20-note {{reinterpret_cast}} 1 / 0 : // expected-warning {{division by zero}} 0; } @@ -2277,7 +2277,8 @@ namespace InheritedCtor { struct A { constexpr A(int) {} }; struct B : A { int n; using A::A; }; // expected-note {{here}} - constexpr B b(0); // expected-error {{constant expression}} expected-note {{derived class}} + constexpr B b(0); // expected-error {{constant expression}} cxx11_20-note {{derived class}}\ + // cxx23-note {{not initialized}} struct C : A { using A::A; struct { union { int n, m = 0; }; union { int a = 0; }; int k = 0; }; struct {}; union {}; }; // expected-warning 6{{}} constexpr C c(0); @@ -2316,10 +2317,11 @@ namespace InheritedCtor { namespace PR28366 { namespace ns1 { -void f(char c) { //expected-note2{{declared here}} +void f(char c) { //expected-note{{declared here}} + //cxx11_20-note@-1{{declared here}} struct X { - static constexpr char f() { //expected-error{{never produces a constant expression}} - return c; //expected-error{{reference to local}} expected-note{{function parameter}} + static constexpr char f() { // cxx11_20-error {{never produces a constant expression}} + return c; //expected-error{{reference to local}} cxx11_20-note{{function parameter}} } }; int I = X::f(); diff --git a/clang/test/SemaCXX/constant-expression-cxx14.cpp b/clang/test/SemaCXX/constant-expression-cxx14.cpp index 273d7ff3a208e2..80a7a2dd31531c 100644 --- a/clang/test/SemaCXX/constant-expression-cxx14.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx14.cpp @@ -44,13 +44,13 @@ constexpr int g(int k) { return 3 * k3 + 5 * k2 + n * k - 20; } static_assert(g(2) == 42, ""); -constexpr int h(int n) { // expected-error {{constexpr function never produces a constant expression}} - static const int m = n; // expected-note {{control flows through the definition of a static variable}} \ +constexpr int h(int n) { // cxx14_20-error {{constexpr function never produces a constant expression}} + static const int m = n; // cxx14_20-note {{control flows through the definition of a static variable}} \ // cxx14_20-warning {{definition of a static variable in a constexpr function is a C++23 extension}} return m; } -constexpr int i(int n) { // expected-error {{constexpr function never produces a constant expression}} - thread_local const int m = n; // expected-note {{control flows through the definition of a thread_local variable}} \ +constexpr int i(int n) { // cxx14_20-error {{constexpr function never produces a constant expression}} + thread_local const int m = n; // cxx14_20-note {{control flows through the definition of a thread_local variable}} \ // cxx14_20-warning {{definition of a thread_local variable in a constexpr function is a C++23 extension}} return m; } @@ -68,6 +68,7 @@ constexpr int j(int k) { } } } // expected-note 2{{control reached end of constexpr function}} + // cxx23-warning@-1 {{does not return a value in all control paths}} static_assert(j(0) == -3, ""); static_assert(j(1) == 5, ""); static_assert(j(2), ""); // expected-error {{constant expression}} expected-note {{in call to 'j(2)'}} @@ -104,10 +105,10 @@ static_assert(l(false) == 5, ""); static_assert(l(true), ""); // expected-error {{constant expression}} expected-note {{in call to 'l(true)'}} // Potential constant expression checking is still applied where possible. -constexpr int htonl(int x) { // expected-error {{never produces a constant expression}} +constexpr int htonl(int x) { // cxx14_20-error {{never produces a constant expression}} typedef unsigned char uchar; uchar arr[4] = { uchar(x >> 24), uchar(x >> 16), uchar(x >> 8), uchar(x) }; - return *reinterpret_cast(arr); // expected-note {{reinterpret_cast is not allowed in a constant expression}} + return *reinterpret_cast(arr); // cxx14_20-note {{reinterpret_cast is not allowed in a constant expression}} } constexpr int maybe_htonl(bool isBigEndian, int x) { @@ -183,7 +184,7 @@ namespace string_assign { static_assert(!test1(100), ""); static_assert(!test1(101), ""); // expected-error {{constant expression}} expected-note {{in call to 'test1(101)'}} - constexpr void f() { // expected-error{{constexpr function never produces a constant expression}} expected-note@+2{{assignment to dereferenced one-past-the-end pointer is not allowed in a constant expression}} + constexpr void f() { // cxx14_20-error{{constexpr function never produces a constant expression}} cxx14_20-note@+2{{assignment to dereferenced one-past-the-end pointer is not allowed in a constant expression}} char foo[10] = { "z" }; // expected-note {{here}} foo[10] = 'x'; // expected-warning {{past the end}} } @@ -207,14 +208,14 @@ namespace array_resize { namespace potential_const_expr { constexpr void set(int &n) { n = 1; } constexpr int div_zero_1() { int z = 0; set(z); return 100 / z; } // no error - constexpr int div_zero_2() { // expected-error {{never produces a constant expression}} + constexpr int div_zero_2() { // cxx14_20-error {{never produces a constant expression}} int z = 0; - return 100 / (set(z), 0); // expected-note {{division by zero}} + return 100 / (set(z), 0); // cxx14_20-note {{division by zero}} } - int n; // expected-note {{declared here}} - constexpr int ref() { // expected-error {{never produces a constant expression}} + int n; // cxx14_20-note {{declared here}} + constexpr int ref() { // cxx14_20-error {{never produces a constant expression}} int &r = n; - return r; // expected-note {{read of non-const variable 'n'}} + return r; // cxx14_20-note {{read of non-const variable 'n'}} } } @@ -846,8 +847,8 @@ namespace StmtExpr { static_assert(g() == 0, ""); // expected-error {{constant expression}} expected-note {{in call}} // FIXME: We should handle the void statement expression case. - constexpr int h() { // expected-error {{never produces a constant}} - ({ if (true) {} }); // expected-note {{not supported}} + constexpr int h() { // cxx14_20-error {{never produces a constant}} + ({ if (true) {} }); // cxx14_20-note {{not supported}} return 0; } } @@ -1043,9 +1044,9 @@ static_assert(sum(Cs) == 'a' + 'b', ""); // expected-error{{not an integral cons constexpr int S = sum(Cs); // expected-error{{must be initialized by a constant expression}} expected-note{{in call}} } -constexpr void PR28739(int n) { // expected-error {{never produces a constant}} +constexpr void PR28739(int n) { // cxx14_20-error {{never produces a constant}} int *p = &n; // expected-note {{array 'p' declared here}} - p += (__int128)(unsigned long)-1; // expected-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}} + p += (__int128)(unsigned long)-1; // cxx14_20-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}} // expected-warning@-1 {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 32-bit (4-byte) elements (max possible 4611686018427387904 elements)}} } diff --git a/clang/test/SemaCXX/constant-expression-cxx2b.cpp b/clang/test/SemaCXX/constant-expression-cxx2b.cpp index 2ee1d48d1cd697..2519839b7ac578 100644 --- a/clang/test/SemaCXX/constant-expression-cxx2b.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx2b.cpp @@ -10,36 +10,36 @@ struct Constexpr{}; #if __cplusplus > 202002L -constexpr int f(int n) { // expected-error {{constexpr function never produces a constant expression}} - static const int m = n; // expected-note {{control flows through the definition of a static variable}} \ +constexpr int f(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + static const int m = n; // cxx2a-note {{control flows through the definition of a static variable}} \ // cxx23-warning {{definition of a static variable in a constexpr function is incompatible with C++ standards before C++23}} return m; } -constexpr int g(int n) { // expected-error {{constexpr function never produces a constant expression}} - thread_local const int m = n; // expected-note {{control flows through the definition of a thread_local variable}} \ +constexpr int g(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + thread_local const int m = n; // cxx2a-note {{control flows through the definition of a thread_local variable}} \ // cxx23-warning {{definition of a thread_local variable in a constexpr function is incompatible with C++ standards before C++23}} return m; } -constexpr int c_thread_local(int n) { // expected-error {{constexpr function never produces a constant expression}} - static _Thread_local int m = 0; // expected-note {{control flows through the definition of a thread_local variable}} \ +constexpr int c_thread_local(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + static _Thread_local int m = 0; // cxx2a-note {{control flows through the definition of a thread_local variable}} \ // cxx23-warning {{definition of a static variable in a constexpr function is incompatible with C++ standards before C++23}} return m; } -constexpr int gnu_thread_local(int n) { // expected-error {{constexpr function never produces a constant expression}} - static __thread int m = 0; // expected-note {{control flows through the definition of a thread_local variable}} \ +constexpr int gnu_thread_local(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + static __thread int m = 0; // cxx2a-note {{control flows through the definition of a thread_local variable}} \ // cxx23-warning {{definition of a static variable in a constexpr function is incompatible with C++ standards before C++23}} return m; } -constexpr int h(int n) { // expected-error {{constexpr function never produces a constant expression}} - static const int m = n; // expected-note {{control flows through the definition of a static variable}} \ +constexpr int h(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + static const int m = n; // cxx2a-note {{control flows through the definition of a static variable}} \ // cxx23-warning {{definition of a static variable in a constexpr function is incompatible with C++ standards before C++23}} return &m - &m; } -constexpr int i(int n) { // expected-error {{constexpr function never produces a constant expression}} - thread_local const int m = n; // expected-note {{control flows through the definition of a thread_local variable}} \ +constexpr int i(int n) { // cxx2a-error {{constexpr function never produces a constant expression}} + thread_local const int m = n; // cxx2a-note {{control flows through the definition of a thread_local variable}} \ // cxx23-warning {{definition of a thread_local variable in a constexpr function is incompatible with C++ standards before C++23}} return &m - &m; } diff --git a/clang/test/SemaCXX/cxx23-invalid-constexpr.cpp b/clang/test/SemaCXX/cxx23-invalid-constexpr.cpp new file mode 100644 index 00000000000000..4dc16c59d8058d --- /dev/null +++ b/clang/test/SemaCXX/cxx23-invalid-constexpr.cpp @@ -0,0 +1,159 @@ +// RUN: %clang_cc1 -fsyntax-only -verify=expected -std=c++23 %s + +// This test covers modifications made by P2448R2. + +// Check that there is no error when a constexpr function that never produces a +// constant expression, but still an error if such function is called from +// constexpr context. +constexpr int F(int N) { + double D = 2.0 / 0.0; // expected-note {{division by zero}} + return 1; +} + +constexpr int F0(int N) { + if (N == 0) + double d2 = 2.0 / 0.0; // expected-note {{division by zero}} + return 1; +} + +template +constexpr int FT(T N) { + double D = 2.0 / 0.0; // expected-note {{division by zero}} + return 1; +} + +class NonLiteral { // expected-note {{'NonLiteral' is not literal because it is not an aggregate and has no constexpr constructors}} +public: + NonLiteral() {} + ~NonLiteral() {} +}; + +constexpr NonLiteral F1() { + return NonLiteral{}; +} + +constexpr int F2(NonLiteral N) { + return 8; +} + +class Derived : public NonLiteral { + constexpr ~Derived() {}; +}; + +class Derived1 : public NonLiteral { + constexpr Derived1() : NonLiteral () {} +}; + + +struct X { + X(); + X(const X&); + X(X&&); + X& operator=(X&); + X& operator=(X&& other); + bool operator==(X const&) const; +}; + +template +struct Wrapper { + constexpr Wrapper() = default; + constexpr Wrapper(Wrapper const&) = default; + constexpr Wrapper(T const& t) : t(t) { } + constexpr Wrapper(Wrapper &&) = default; + constexpr X get() const { return t; } + constexpr bool operator==(Wrapper const&) const = default; + private: + T t; +}; + +struct WrapperNonT { + constexpr WrapperNonT() = default; + constexpr WrapperNonT(WrapperNonT const&) = default; + constexpr WrapperNonT(X const& t) : t(t) { } + constexpr WrapperNonT(WrapperNonT &&) = default; + constexpr WrapperNonT& operator=(WrapperNonT &) = default; + constexpr WrapperNonT& operator=(WrapperNonT&& other) = default; + constexpr X get() const { return t; } + constexpr bool operator==(WrapperNonT const&) const = default; + private: + X t; +}; + +struct NonDefaultMembers { + constexpr NonDefaultMembers() {}; // expected-note {{non-literal type 'X' cannot be used in a constant expression}} + constexpr NonDefaultMembers(NonDefaultMembers const&) {}; + constexpr NonDefaultMembers(NonDefaultMembers &&) {}; + constexpr NonDefaultMembers& operator=(NonDefaultMembers &other) {this->t = other.t; return *this;} + constexpr NonDefaultMembers& operator=(NonDefaultMembers&& other) {this->t = other.t; return *this;} + constexpr bool operator==(NonDefaultMembers const& other) const {return this->t == other.t;} + X t; +}; + +int Glob = 0; +class C1 { +public: + constexpr C1() : D(Glob) {}; +private: + int D; +}; + +void test() { + + constexpr int A = F(3); // expected-error {{constexpr variable 'A' must be initialized by a constant expression}} + // expected-note@-1 {{in call}} + F(3); + constexpr int B = F0(0); // expected-error {{constexpr variable 'B' must be initialized by a constant expression}} + // expected-note@-1 {{in call}} + F0(0); + constexpr auto C = F1(); // expected-error {{constexpr variable cannot have non-literal type 'const NonLiteral'}} + F1(); + NonLiteral L; + constexpr auto D = F2(L); // expected-error {{constexpr variable 'D' must be initialized by a constant expression}} + // expected-note@-1 {{non-literal type 'NonLiteral' cannot be used in a constant expression}} + + constexpr auto E = FT(1); // expected-error {{constexpr variable 'E' must be initialized by a constant expression}} + // expected-note@-1 {{in call}} + F2(L); + + Wrapper x; + WrapperNonT x1; + NonDefaultMembers x2; + + // TODO these produce notes with an invalid source location. + // static_assert((Wrapper(), true)); + // static_assert((WrapperNonT(), true),""); + + static_assert((NonDefaultMembers(), true),""); // expected-error{{expression is not an integral constant expression}} \ + // expected-note {{in call to}} + constexpr bool FFF = (NonDefaultMembers() == NonDefaultMembers()); // expected-error{{must be initialized by a constant expression}} \ + // expected-note{{non-literal}} +} + +struct A { + A (); + ~A(); +}; + +template +struct opt +{ + union { + char c; + T data; + }; + + constexpr opt() {} + + constexpr ~opt() { + if (engaged) + data.~T(); + } + + bool engaged = false; +}; + +consteval void foo() { + opt a; +} + +void bar() { foo(); } diff --git a/clang/test/SemaCXX/cxx2a-consteval.cpp b/clang/test/SemaCXX/cxx2a-consteval.cpp index d8482ec53f0ed4..192621225a543c 100644 --- a/clang/test/SemaCXX/cxx2a-consteval.cpp +++ b/clang/test/SemaCXX/cxx2a-consteval.cpp @@ -54,7 +54,7 @@ struct C { struct D { C c; - consteval D() = default; // expected-error {{cannot be consteval}} + consteval D() = default; // expected-error {{cannot be marked consteval}} consteval ~D() = default; // expected-error {{destructor cannot be declared consteval}} }; diff --git a/clang/test/SemaCXX/cxx2b-p2266-disable-with-msvc-compat.cpp b/clang/test/SemaCXX/cxx2b-p2266-disable-with-msvc-compat.cpp index d40491834d3988..9323dea24bd75b 100644 --- a/clang/test/SemaCXX/cxx2b-p2266-disable-with-msvc-compat.cpp +++ b/clang/test/SemaCXX/cxx2b-p2266-disable-with-msvc-compat.cpp @@ -9,7 +9,7 @@ #if __INCLUDE_LEVEL__ == 0 -#if __cpluscplus > 202002L && __cpp_implicit_move < 202011L +#if __cpluscplus > 202002L && __cpp_implicit_move < 202207L #error "__cpp_implicit_move not defined correctly" #endif diff --git a/clang/test/SemaCXX/deduced-return-type-cxx14.cpp b/clang/test/SemaCXX/deduced-return-type-cxx14.cpp index 415bbbf1a0bc50..431d77ca785b8e 100644 --- a/clang/test/SemaCXX/deduced-return-type-cxx14.cpp +++ b/clang/test/SemaCXX/deduced-return-type-cxx14.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify=expected,since-cxx20,since-cxx14,cxx20_23,cxx23 %s // RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify=expected,since-cxx20,since-cxx14,cxx20_23,cxx23 %s -fdelayed-template-parsing -DDELAYED_TEMPLATE_PARSING -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,since-cxx20,since-cxx14,cxx14_20,cxx20_23 %s -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,since-cxx20,since-cxx14,cxx14_20,cxx20_23 %s -fdelayed-template-parsing -DDELAYED_TEMPLATE_PARSING +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,cxx20,since-cxx20,since-cxx14,cxx14_20,cxx20_23 %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,cxx20,since-cxx20,since-cxx14,cxx14_20,cxx20_23 %s -fdelayed-template-parsing -DDELAYED_TEMPLATE_PARSING // RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify=expected,since-cxx14,cxx14_20,cxx14 %s // RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify=expected,since-cxx14,cxx14_20,cxx14 %s -fdelayed-template-parsing -DDELAYED_TEMPLATE_PARSING @@ -299,8 +299,8 @@ namespace Constexpr { constexpr int q = Y().f(); // expected-error {{must be initialized by a constant expression}} expected-note {{in call to 'Y().f()'}} } struct NonLiteral { ~NonLiteral(); } nl; // cxx14-note {{user-provided destructor}} - // cxx20_23-note@-1 {{'NonLiteral' is not literal because its destructor is not constexpr}} - constexpr auto f2(int n) { return nl; } // expected-error {{return type 'struct NonLiteral' is not a literal type}} + // cxx20-note@-1 {{'NonLiteral' is not literal because its destructor is not constexpr}} + constexpr auto f2(int n) { return nl; } // cxx14_20-error {{constexpr function's return type 'struct NonLiteral' is not a literal type}} } // It's not really clear whether these are valid, but this matches g++. diff --git a/clang/test/SemaOpenCLCXX/addrspace-constructors.clcpp b/clang/test/SemaOpenCLCXX/addrspace-constructors.clcpp index 1b97484767b1a5..067a404c489aa6 100644 --- a/clang/test/SemaOpenCLCXX/addrspace-constructors.clcpp +++ b/clang/test/SemaOpenCLCXX/addrspace-constructors.clcpp @@ -54,5 +54,5 @@ struct Z { struct W { int w; - constexpr W() __constant = default; // expected-error {{defaulted definition of default constructor is not constexpr}} + constexpr W() __constant = default; // expected-error {{defaulted definition of default constructor cannot be marked constexpr}} }; diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index 7d45e999448d9f..b9c36eab2ad3b7 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -112,7 +112,9 @@ Options::Options(DiagnosticsEngine &Diag, FileManager *FM, for (const Arg *A : ArgList) { if (A->isClaimed()) continue; - FrontendArgs.emplace_back(A->getAsString(ArgList)); + + FrontendArgs.emplace_back(A->getSpelling()); + llvm::copy(A->getValues(), std::back_inserter(FrontendArgs)); } FrontendArgs.push_back("-fsyntax-only"); } diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 576e8f2cd7f8fd..7e6e289c50d872 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -405,12 +405,19 @@ fatbinary(ArrayRef> InputFiles, CmdArgs.push_back("-type=o"); CmdArgs.push_back("-bundle-align=4096"); + if (Args.hasArg(OPT_compress)) + CmdArgs.push_back("-compress"); + SmallVector Targets = {"-targets=host-x86_64-unknown-linux"}; for (const auto &[File, Arch] : InputFiles) Targets.push_back(Saver.save("hipv4-amdgcn-amd-amdhsa--" + Arch)); CmdArgs.push_back(Saver.save(llvm::join(Targets, ","))); +#ifdef _WIN32 + CmdArgs.push_back("-input=NUL"); +#else CmdArgs.push_back("-input=/dev/null"); +#endif for (const auto &[File, Arch] : InputFiles) CmdArgs.push_back(Saver.save("-input=" + File)); diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index 763426570c2a6f..473fb19d922385 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -58,6 +58,8 @@ def print_wrapped_module : Flag<["--"], "print-wrapped-module">, HelpText<"Print the wrapped module's IR for testing">; def save_temps : Flag<["--"], "save-temps">, Flags<[WrapperOnlyOption]>, HelpText<"Save intermediate results">; +def compress : Flag<["--"], "compress">, + Flags<[WrapperOnlyOption]>, HelpText<"Compress bundled files">; def wrapper_time_trace_eq : Joined<["--"], "wrapper-time-trace=">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, @@ -84,10 +86,6 @@ def linker_arg_EQ : Joined<["--"], "linker-arg=">, Flags<[DeviceOnlyOption, HelpHidden]>, HelpText<"An extra argument to be passed to the linker">; -// Separator between the linker wrapper and host linker flags. -def separator : Flag<["--"], "">, Flags<[WrapperOnlyOption]>, - HelpText<"The separator for the wrapped linker arguments">; - // Arguments for the LLVM backend. def mllvm : Separate<["-"], "mllvm">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Arguments passed to the LLVM invocation">; diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp index 8799d03dfd3c58..465a8e21690c4a 100644 --- a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp @@ -190,7 +190,8 @@ TEST_F(EnvironmentTest, JoinRecords) { Env2.setValue(Loc, Val2); Environment::ValueModel Model; - Environment EnvJoined = Environment::join(Env1, Env2, Model); + Environment EnvJoined = + Environment::join(Env1, Env2, Model, Environment::DiscardExprState); auto *JoinedVal = cast(EnvJoined.getValue(Loc)); EXPECT_NE(JoinedVal, &Val1); EXPECT_NE(JoinedVal, &Val2); diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp index 09f5524e152c9f..5c4d42c6ccdcf8 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp @@ -179,7 +179,10 @@ llvm::Error test::checkDataflowWithNoopAnalysis( // -fnodelayed-template-parsing is the default everywhere but on Windows. // Set it explicitly so that tests behave the same on Windows as on other // platforms. - "-fsyntax-only", "-fno-delayed-template-parsing", + // Set -Wno-unused-value because it's often desirable in tests to write + // expressions with unused value, and we don't want the output to be + // cluttered with warnings about them. + "-fsyntax-only", "-fno-delayed-template-parsing", "-Wno-unused-value", "-std=" + std::string(LangStandard::getLangStandardForKind(Std).getName())}; AnalysisInputs AI( diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index f534ccb1254701..a8c282f140b4cd 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -1476,6 +1476,69 @@ TEST(TransferTest, BaseClassInitializer) { llvm::Succeeded()); } +TEST(TransferTest, FieldsDontHaveValuesInConstructor) { + // In a constructor, unlike in regular member functions, we don't want fields + // to be pre-initialized with values, because doing so is the job of the + // constructor. + std::string Code = R"( + struct target { + target() { + 0; + // [[p]] + // Mention the field so it is modeled; + Val; + } + + int Val; + }; + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "Val", + ASTCtx, Env), + nullptr); + }); +} + +TEST(TransferTest, FieldsDontHaveValuesInConstructorWithBaseClass) { + // See above, but for a class with a base class. + std::string Code = R"( + struct Base { + int BaseVal; + }; + + struct target : public Base { + target() { + 0; + // [[p]] + // Mention the fields so they are modeled. + BaseVal; + Val; + } + + int Val; + }; + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + // FIXME: The field of the base class should already have been + // initialized with a value by the base constructor. This test documents + // the current buggy behavior. + EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "BaseVal", + ASTCtx, Env), + nullptr); + EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "Val", + ASTCtx, Env), + nullptr); + }); +} + TEST(TransferTest, StructModeledFieldsWithAccessor) { std::string Code = R"( class S { @@ -5672,6 +5735,39 @@ TEST(TransferTest, ContextSensitiveReturnInt) { {BuiltinOptions{ContextSensitiveOptions{}}}); } +TEST(TransferTest, ContextSensitiveReturnRecord) { + std::string Code = R"( + struct S { + bool B; + }; + + S makeS(bool BVal) { return {BVal}; } + + void target() { + S FalseS = makeS(false); + S TrueS = makeS(true); + // [[p]] + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto &FalseSLoc = + getLocForDecl(ASTCtx, Env, "FalseS"); + auto &TrueSLoc = + getLocForDecl(ASTCtx, Env, "TrueS"); + + EXPECT_EQ(getFieldValue(&FalseSLoc, "B", ASTCtx, Env), + &Env.getBoolLiteralValue(false)); + EXPECT_EQ(getFieldValue(&TrueSLoc, "B", ASTCtx, Env), + &Env.getBoolLiteralValue(true)); + }, + {BuiltinOptions{ContextSensitiveOptions{}}}); +} + TEST(TransferTest, ContextSensitiveMethodLiteral) { std::string Code = R"( class MyClass { diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index 34f9b0b23719fe..9d05a0d6ca4010 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -244,15 +244,17 @@ TEST_F(DiscardExprStateTest, WhileStatement) { EXPECT_NE(NotEqOpState.Env.getValue(NotEqOp), nullptr); // In the block that calls `foo(p)`, the value for `p != nullptr` is discarded - // because it is not consumed by this block. + // because it is not consumed outside the block it is in. const auto &CallFooState = blockStateForStmt(BlockStates, CallFoo); EXPECT_EQ(CallFooState.Env.getValue(NotEqOp), nullptr); } TEST_F(DiscardExprStateTest, BooleanOperator) { std::string Code = R"( - bool target(bool b1, bool b2) { - return b1 && b2; + void f(); + void target(bool b1, bool b2) { + if (b1 && b2) + f(); } )"; auto BlockStates = llvm::cantFail(runAnalysis( @@ -260,46 +262,80 @@ TEST_F(DiscardExprStateTest, BooleanOperator) { const auto &AndOp = matchNode(binaryOperator(hasOperatorName("&&"))); - const auto &Return = matchNode(returnStmt()); + const auto &CallF = + matchNode(callExpr(callee(functionDecl(hasName("f"))))); // In the block that evaluates the LHS of the `&&` operator, the LHS is // associated with a value, while the right-hand side is not (unsurprisingly, // as it hasn't been evaluated yet). const auto &LHSState = blockStateForStmt(BlockStates, *AndOp.getLHS()); auto *LHSValue = cast(LHSState.Env.getValue(*AndOp.getLHS())); - ASSERT_NE(LHSValue, nullptr); + EXPECT_NE(LHSValue, nullptr); EXPECT_EQ(LHSState.Env.getValue(*AndOp.getRHS()), nullptr); - // In the block that evaluates the RHS, the RHS is associated with a - // value. The value for the LHS has been discarded as it is not consumed by - // this block. + // In the block that evaluates the RHS, both the LHS and RHS are associated + // with values, as they are both subexpressions of the `&&` operator, which + // is evaluated in a later block. const auto &RHSState = blockStateForStmt(BlockStates, *AndOp.getRHS()); - EXPECT_EQ(RHSState.Env.getValue(*AndOp.getLHS()), nullptr); - auto *RHSValue = cast(RHSState.Env.getValue(*AndOp.getRHS())); - ASSERT_NE(RHSValue, nullptr); - - // In the block that evaluates the return statement, the expression `b1 && b2` - // is associated with a value (and check that it's the right one). - // The expressions `b1` and `b2` are _not_ associated with a value in this - // block, even though they are consumed by the block, because: - // * This block has two prececessor blocks (the one that evaluates `b1` and - // the one that evaluates `b2`). - // * `b1` is only associated with a value in the block that evaluates `b1` but - // not the block that evalutes `b2`, so the join operation discards the - // value for `b1`. - // * `b2` is only associated with a value in the block that evaluates `b2` but - // not the block that evaluates `b1`, the the join operation discards the - // value for `b2`. - // Nevertheless, the analysis generates the correct formula for `b1 && b2` - // because the transfer function for the `&&` operator retrieves the values - // for its operands from the environments for the blocks that compute the - // operands, rather than from the environment for the block that contains the - // `&&`. - const auto &ReturnState = blockStateForStmt(BlockStates, Return); - EXPECT_EQ(ReturnState.Env.getValue(*AndOp.getLHS()), nullptr); - EXPECT_EQ(ReturnState.Env.getValue(*AndOp.getRHS()), nullptr); - EXPECT_EQ(ReturnState.Env.getValue(AndOp), - &ReturnState.Env.makeAnd(*LHSValue, *RHSValue)); + EXPECT_EQ(RHSState.Env.getValue(*AndOp.getLHS()), LHSValue); + auto *RHSValue = RHSState.Env.get(*AndOp.getRHS()); + EXPECT_NE(RHSValue, nullptr); + + // In the block that evaluates `b1 && b2`, the `&&` as well as its operands + // are associated with values. + const auto &AndOpState = blockStateForStmt(BlockStates, AndOp); + EXPECT_EQ(AndOpState.Env.getValue(*AndOp.getLHS()), LHSValue); + EXPECT_EQ(AndOpState.Env.getValue(*AndOp.getRHS()), RHSValue); + EXPECT_EQ(AndOpState.Env.getValue(AndOp), + &AndOpState.Env.makeAnd(*LHSValue, *RHSValue)); + + // In the block that calls `f()`, none of `b1`, `b2`, or `b1 && b2` should be + // associated with values. + const auto &CallFState = blockStateForStmt(BlockStates, CallF); + EXPECT_EQ(CallFState.Env.getValue(*AndOp.getLHS()), nullptr); + EXPECT_EQ(CallFState.Env.getValue(*AndOp.getRHS()), nullptr); + EXPECT_EQ(CallFState.Env.getValue(AndOp), nullptr); +} + +TEST_F(DiscardExprStateTest, ConditionalOperator) { + std::string Code = R"( + void f(int*, int); + void g(); + bool cond(); + + void target() { + int i = 0; + if (cond()) + f(&i, cond() ? 1 : 0); + g(); + } + )"; + auto BlockStates = llvm::cantFail(runAnalysis( + Code, [](ASTContext &C) { return NoopAnalysis(C); })); + + const auto &AddrOfI = + matchNode(unaryOperator(hasOperatorName("&"))); + const auto &CallF = + matchNode(callExpr(callee(functionDecl(hasName("f"))))); + const auto &CallG = + matchNode(callExpr(callee(functionDecl(hasName("g"))))); + + // In the block that evaluates `&i`, it should obviously have a value. + const auto &AddrOfIState = blockStateForStmt(BlockStates, AddrOfI); + auto *AddrOfIVal = AddrOfIState.Env.get(AddrOfI); + EXPECT_NE(AddrOfIVal, nullptr); + + // Because of the conditional operator, the `f(...)` call is evaluated in a + // different block than `&i`, but `&i` still needs to have a value here + // because it's a subexpression of the call. + const auto &CallFState = blockStateForStmt(BlockStates, CallF); + EXPECT_NE(&CallFState, &AddrOfIState); + EXPECT_EQ(CallFState.Env.get(AddrOfI), AddrOfIVal); + + // In the block that calls `g()`, `&i` should no longer be associated with a + // value. + const auto &CallGState = blockStateForStmt(BlockStates, CallG); + EXPECT_EQ(CallGState.Env.get(AddrOfI), nullptr); } struct NonConvergingLattice { diff --git a/clang/unittests/Basic/DarwinSDKInfoTest.cpp b/clang/unittests/Basic/DarwinSDKInfoTest.cpp index 5f24e6eae515d2..7214f3bc8e19f4 100644 --- a/clang/unittests/Basic/DarwinSDKInfoTest.cpp +++ b/clang/unittests/Basic/DarwinSDKInfoTest.cpp @@ -168,6 +168,16 @@ TEST(DarwinSDKInfoTest, ParseAndTestMappingIOSDerived) { EXPECT_EQ( *Mapping->map(VersionTuple(13, 0), VersionTuple(), VersionTuple(99, 99)), VersionTuple(99, 99)); + + // Verify introduced, deprecated, and obsoleted mappings. + EXPECT_EQ(Mapping->mapIntroducedAvailabilityVersion(VersionTuple(10, 1)), + VersionTuple(10.0)); + EXPECT_EQ(Mapping->mapDeprecatedObsoletedAvailabilityVersion( + VersionTuple(100000, 0)), + VersionTuple(100000)); + EXPECT_EQ( + Mapping->mapDeprecatedObsoletedAvailabilityVersion(VersionTuple(13.0)), + VersionTuple(15, 0, 99)); } TEST(DarwinSDKInfoTest, MissingKeys) { diff --git a/clang/unittests/Interpreter/CMakeLists.txt b/clang/unittests/Interpreter/CMakeLists.txt index 712641afb976dd..0ddedb283e07d1 100644 --- a/clang/unittests/Interpreter/CMakeLists.txt +++ b/clang/unittests/Interpreter/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS ) add_clang_unittest(ClangReplInterpreterTests + IncrementalCompilerBuilderTest.cpp IncrementalProcessingTest.cpp InterpreterTest.cpp CodeCompletionTest.cpp diff --git a/clang/unittests/Interpreter/IncrementalCompilerBuilderTest.cpp b/clang/unittests/Interpreter/IncrementalCompilerBuilderTest.cpp new file mode 100644 index 00000000000000..f729566f7efde6 --- /dev/null +++ b/clang/unittests/Interpreter/IncrementalCompilerBuilderTest.cpp @@ -0,0 +1,47 @@ +//=== unittests/Interpreter/IncrementalCompilerBuilderTest.cpp ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/TargetOptions.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Interpreter/Interpreter.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "llvm/Support/Error.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; + +namespace { + +// Usually FrontendAction takes the raw pointers and wraps them back into +// unique_ptrs in InitializeFileRemapping() +static void cleanupRemappedFileBuffers(CompilerInstance &CI) { + for (const auto &RB : CI.getPreprocessorOpts().RemappedFileBuffers) { + delete RB.second; + } + CI.getPreprocessorOpts().clearRemappedFiles(); +} + +TEST(IncrementalCompilerBuilder, SetCompilerArgs) { + std::vector ClangArgv = {"-Xclang", "-ast-dump-all"}; + auto CB = clang::IncrementalCompilerBuilder(); + CB.SetCompilerArgs(ClangArgv); + auto CI = cantFail(CB.CreateCpp()); + EXPECT_TRUE(CI->getFrontendOpts().ASTDumpAll); + cleanupRemappedFileBuffers(*CI); +} + +TEST(IncrementalCompilerBuilder, SetTargetTriple) { + auto CB = clang::IncrementalCompilerBuilder(); + CB.SetTargetTriple("armv6-none-eabi"); + auto CI = cantFail(CB.CreateCpp()); + EXPECT_EQ(CI->getTargetOpts().Triple, "armv6-none-unknown-eabi"); + cleanupRemappedFileBuffers(*CI); +} + +} // end anonymous namespace diff --git a/clang/unittests/Sema/SemaNoloadLookupTest.cpp b/clang/unittests/Sema/SemaNoloadLookupTest.cpp index b24c72cba407f3..cf89c7331e4e0f 100644 --- a/clang/unittests/Sema/SemaNoloadLookupTest.cpp +++ b/clang/unittests/Sema/SemaNoloadLookupTest.cpp @@ -64,7 +64,7 @@ class NoloadLookupTest : public ::testing::Test { CIOpts.VFS = llvm::vfs::createPhysicalFileSystem(); std::string CacheBMIPath = - llvm::Twine(TestDir + "/" + ModuleName + " .pcm").str(); + llvm::Twine(TestDir + "/" + ModuleName + ".pcm").str(); std::string PrebuiltModulePath = "-fprebuilt-module-path=" + TestDir.str().str(); const char *Args[] = {"clang++", @@ -75,9 +75,7 @@ class NoloadLookupTest : public ::testing::Test { TestDir.c_str(), "-I", TestDir.c_str(), - FileName.c_str(), - "-o", - CacheBMIPath.c_str()}; + FileName.c_str()}; std::shared_ptr Invocation = createInvocation(Args, CIOpts); EXPECT_TRUE(Invocation); @@ -85,7 +83,8 @@ class NoloadLookupTest : public ::testing::Test { CompilerInstance Instance; Instance.setDiagnostics(Diags.get()); Instance.setInvocation(Invocation); - GenerateModuleInterfaceAction Action; + Instance.getFrontendOpts().OutputFile = CacheBMIPath; + GenerateReducedModuleInterfaceAction Action; EXPECT_TRUE(Instance.ExecuteAction(Action)); EXPECT_FALSE(Diags->hasErrorOccurred()); diff --git a/clang/unittests/Serialization/ForceCheckFileInputTest.cpp b/clang/unittests/Serialization/ForceCheckFileInputTest.cpp index ed0daa43436eb6..ad8892b8c8be1e 100644 --- a/clang/unittests/Serialization/ForceCheckFileInputTest.cpp +++ b/clang/unittests/Serialization/ForceCheckFileInputTest.cpp @@ -69,9 +69,9 @@ export int aa = 43; CIOpts.Diags = Diags; CIOpts.VFS = llvm::vfs::createPhysicalFileSystem(); - const char *Args[] = { - "clang++", "-std=c++20", "--precompile", "-working-directory", - TestDir.c_str(), "a.cppm", "-o", BMIPath.c_str()}; + const char *Args[] = {"clang++", "-std=c++20", + "--precompile", "-working-directory", + TestDir.c_str(), "a.cppm"}; std::shared_ptr Invocation = createInvocation(Args, CIOpts); EXPECT_TRUE(Invocation); @@ -88,6 +88,8 @@ export int aa = 43; Instance.setDiagnostics(Diags.get()); Instance.setInvocation(Invocation); + Instance.getFrontendOpts().OutputFile = BMIPath; + if (auto VFSWithRemapping = createVFSFromCompilerInvocation( Instance.getInvocation(), Instance.getDiagnostics(), CIOpts.VFS)) CIOpts.VFS = VFSWithRemapping; @@ -95,7 +97,7 @@ export int aa = 43; Instance.getHeaderSearchOpts().ValidateASTInputFilesContent = true; - GenerateModuleInterfaceAction Action; + GenerateReducedModuleInterfaceAction Action; EXPECT_TRUE(Instance.ExecuteAction(Action)); EXPECT_FALSE(Diags->hasErrorOccurred()); } diff --git a/clang/unittests/Serialization/NoCommentsTest.cpp b/clang/unittests/Serialization/NoCommentsTest.cpp index 2632a6337807ac..a0a564aeff9a15 100644 --- a/clang/unittests/Serialization/NoCommentsTest.cpp +++ b/clang/unittests/Serialization/NoCommentsTest.cpp @@ -90,9 +90,9 @@ void foo() {} CIOpts.VFS = llvm::vfs::createPhysicalFileSystem(); std::string CacheBMIPath = llvm::Twine(TestDir + "/Comments.pcm").str(); - const char *Args[] = { - "clang++", "-std=c++20", "--precompile", "-working-directory", - TestDir.c_str(), "Comments.cppm", "-o", CacheBMIPath.c_str()}; + const char *Args[] = {"clang++", "-std=c++20", + "--precompile", "-working-directory", + TestDir.c_str(), "Comments.cppm"}; std::shared_ptr Invocation = createInvocation(Args, CIOpts); ASSERT_TRUE(Invocation); @@ -100,7 +100,8 @@ void foo() {} CompilerInstance Instance; Instance.setDiagnostics(Diags.get()); Instance.setInvocation(Invocation); - GenerateModuleInterfaceAction Action; + Instance.getFrontendOpts().OutputFile = CacheBMIPath; + GenerateReducedModuleInterfaceAction Action; ASSERT_TRUE(Instance.ExecuteAction(Action)); ASSERT_FALSE(Diags->hasErrorOccurred()); diff --git a/clang/unittests/Serialization/VarDeclConstantInitTest.cpp b/clang/unittests/Serialization/VarDeclConstantInitTest.cpp index 7efa1c1d64a964..5cbbfb9ff003b3 100644 --- a/clang/unittests/Serialization/VarDeclConstantInitTest.cpp +++ b/clang/unittests/Serialization/VarDeclConstantInitTest.cpp @@ -96,10 +96,9 @@ export namespace Fibonacci CIOpts.Diags = Diags; CIOpts.VFS = llvm::vfs::createPhysicalFileSystem(); - std::string CacheBMIPath = llvm::Twine(TestDir + "/Cached.pcm").str(); - const char *Args[] = { - "clang++", "-std=c++20", "--precompile", "-working-directory", - TestDir.c_str(), "Cached.cppm", "-o", CacheBMIPath.c_str()}; + const char *Args[] = {"clang++", "-std=c++20", + "--precompile", "-working-directory", + TestDir.c_str(), "Cached.cppm"}; std::shared_ptr Invocation = createInvocation(Args, CIOpts); ASSERT_TRUE(Invocation); @@ -108,7 +107,11 @@ export namespace Fibonacci CompilerInstance Instance; Instance.setDiagnostics(Diags.get()); Instance.setInvocation(Invocation); - GenerateModuleInterfaceAction Action; + + std::string CacheBMIPath = llvm::Twine(TestDir + "/Cached.pcm").str(); + Instance.getFrontendOpts().OutputFile = CacheBMIPath; + + GenerateReducedModuleInterfaceAction Action; ASSERT_TRUE(Instance.ExecuteAction(Action)); ASSERT_FALSE(Diags->hasErrorOccurred()); diff --git a/clang/utils/TableGen/ClangOpcodesEmitter.cpp b/clang/utils/TableGen/ClangOpcodesEmitter.cpp index 1c41301ab3aeeb..120e1e2efa32b4 100644 --- a/clang/utils/TableGen/ClangOpcodesEmitter.cpp +++ b/clang/utils/TableGen/ClangOpcodesEmitter.cpp @@ -274,7 +274,7 @@ void ClangOpcodesEmitter::EmitGroup(raw_ostream &OS, StringRef N, // Emit the prototype of the group emitter in the header. OS << "#if defined(GET_EVAL_PROTO) || defined(GET_LINK_PROTO)\n"; - OS << "bool " << EmitFuncName << "("; + OS << "[[nodiscard]] bool " << EmitFuncName << "("; for (size_t I = 0, N = Types->size(); I < N; ++I) OS << "PrimType, "; for (auto *Arg : Args) diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 774c71bc1cb6b7..503472a2cae4eb 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -3154,7 +3154,7 @@

C++ defect report implementation status

519 CD1 Null pointer preservation in void* conversions - Yes + Clang 2.7 520 @@ -3468,7 +3468,7 @@

C++ defect report implementation status

571 CD2 References declared const - Unknown + Clang 2.7 572 diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 421b3426b006f9..fa00e7685610a6 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -197,7 +197,7 @@

C++23 implementation status

Simpler implicit move - P2266R1 + P2266R3 Clang 13 @@ -356,14 +356,7 @@

C++23 implementation status

Relaxing some constexpr restrictions P2448R2 - -
Clang 17 (Partial) - We do not support outside of defaulted special memeber functions the change that constexpr functions no - longer have to be constexpr compatible but rather support a less restricted requirements for constexpr - functions. Which include allowing non-literal types as return values and parameters, allow calling of - non-constexpr functions and constructors. -
- + Clang 19 Using unknown pointers and references in constant expressions diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 2ca18ebb4ad489..4f47142850a55e 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -103,7 +103,7 @@ check_cxx_compiler_flag("-Werror -msse4.2" COMPILER_RT_HAS_MSSE4_2_FLAG) check_cxx_compiler_flag(--sysroot=. COMPILER_RT_HAS_SYSROOT_FLAG) check_cxx_compiler_flag("-Werror -mcrc" COMPILER_RT_HAS_MCRC_FLAG) check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG) -check_cxx_compiler_flag(-Werror -ftrivial-auto-var-init=pattern COMPILER_RT_HAS_TRIVIAL_AUTO_INIT) +check_cxx_compiler_flag("-Werror -ftrivial-auto-var-init=pattern" COMPILER_RT_HAS_TRIVIAL_AUTO_INIT) if(NOT WIN32 AND NOT CYGWIN) # MinGW warns if -fvisibility-inlines-hidden is used. @@ -150,21 +150,21 @@ check_cxx_compiler_flag(/wd4391 COMPILER_RT_HAS_WD4391_FLAG) check_cxx_compiler_flag(/wd4722 COMPILER_RT_HAS_WD4722_FLAG) check_cxx_compiler_flag(/wd4800 COMPILER_RT_HAS_WD4800_FLAG) -check_cxx_compiler_flag(-Werror -Warray-bounds COMPILER_RT_HAS_ARRAY_BOUNDS_FLAG) -check_cxx_compiler_flag(-Werror -Wuninitialized COMPILER_RT_HAS_UNINITIALIZED_FLAG) -check_cxx_compiler_flag(-Werror -Wshadow COMPILER_RT_HAS_SHADOW_FLAG) -check_cxx_compiler_flag(-Werror -Wempty-body COMPILER_RT_HAS_EMPTY_BODY_FLAG) -check_cxx_compiler_flag(-Werror -Wsizeof-pointer-memaccess COMPILER_RT_HAS_SIZEOF_POINTER_MEMACCESS_FLAG) -check_cxx_compiler_flag(-Werror -Wsizeof-array-argument COMPILER_RT_HAS_SIZEOF_ARRAY_ARGUMENT_FLAG) -check_cxx_compiler_flag(-Werror -Wsuspicious-memaccess COMPILER_RT_HAS_SUSPICIOUS_MEMACCESS_FLAG) -check_cxx_compiler_flag(-Werror -Wbuiltin-memcpy-chk-size COMPILER_RT_HAS_BUILTIN_MEMCPY_CHK_SIZE_FLAG) -check_cxx_compiler_flag(-Werror -Warray-bounds-pointer-arithmetic COMPILER_RT_HAS_ARRAY_BOUNDS_POINTER_ARITHMETIC_FLAG) -check_cxx_compiler_flag(-Werror -Wreturn-stack-address COMPILER_RT_HAS_RETURN_STACK_ADDRESS_FLAG) -check_cxx_compiler_flag(-Werror -Wsizeof-array-decay COMPILER_RT_HAS_SIZEOF_ARRAY_DECAY_FLAG) -check_cxx_compiler_flag(-Werror -Wformat-insufficient-args COMPILER_RT_HAS_FORMAT_INSUFFICIENT_ARGS_FLAG) -check_cxx_compiler_flag(-Werror -Wformat-security COMPILER_RT_HAS_BUILTIN_FORMAL_SECURITY_FLAG) -check_cxx_compiler_flag(-Werror -Wsizeof-array-div COMPILER_RT_HAS_SIZEOF_ARRAY_DIV_FLAG) -check_cxx_compiler_flag(-Werror -Wsizeof-pointer-div COMPILER_RT_HAS_SIZEOF_POINTER_DIV_FLAG) +check_cxx_compiler_flag("-Werror -Warray-bounds" COMPILER_RT_HAS_ARRAY_BOUNDS_FLAG) +check_cxx_compiler_flag("-Werror -Wuninitialized" COMPILER_RT_HAS_UNINITIALIZED_FLAG) +check_cxx_compiler_flag("-Werror -Wshadow" COMPILER_RT_HAS_SHADOW_FLAG) +check_cxx_compiler_flag("-Werror -Wempty-body" COMPILER_RT_HAS_EMPTY_BODY_FLAG) +check_cxx_compiler_flag("-Werror -Wsizeof-pointer-memaccess" COMPILER_RT_HAS_SIZEOF_POINTER_MEMACCESS_FLAG) +check_cxx_compiler_flag("-Werror -Wsizeof-array-argument" COMPILER_RT_HAS_SIZEOF_ARRAY_ARGUMENT_FLAG) +check_cxx_compiler_flag("-Werror -Wsuspicious-memaccess" COMPILER_RT_HAS_SUSPICIOUS_MEMACCESS_FLAG) +check_cxx_compiler_flag("-Werror -Wbuiltin-memcpy-chk-size" COMPILER_RT_HAS_BUILTIN_MEMCPY_CHK_SIZE_FLAG) +check_cxx_compiler_flag("-Werror -Warray-bounds-pointer-arithmetic" COMPILER_RT_HAS_ARRAY_BOUNDS_POINTER_ARITHMETIC_FLAG) +check_cxx_compiler_flag("-Werror -Wreturn-stack-address" COMPILER_RT_HAS_RETURN_STACK_ADDRESS_FLAG) +check_cxx_compiler_flag("-Werror -Wsizeof-array-decay" COMPILER_RT_HAS_SIZEOF_ARRAY_DECAY_FLAG) +check_cxx_compiler_flag("-Werror -Wformat-insufficient-args" COMPILER_RT_HAS_FORMAT_INSUFFICIENT_ARGS_FLAG) +check_cxx_compiler_flag("-Werror -Wformat-security" COMPILER_RT_HAS_BUILTIN_FORMAL_SECURITY_FLAG) +check_cxx_compiler_flag("-Werror -Wsizeof-array-div" COMPILER_RT_HAS_SIZEOF_ARRAY_DIV_FLAG) +check_cxx_compiler_flag("-Werror -Wsizeof-pointer-div" COMPILER_RT_HAS_SIZEOF_POINTER_DIV_FLAG) # Symbols. check_symbol_exists(__func__ "" COMPILER_RT_HAS_FUNC_SYMBOL) diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp index 71770166805f78..db80eb383885e6 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp @@ -21,10 +21,15 @@ #include #include #include +// clang-format off #include - -// This must be included after windows.h. +// These must be included after windows.h. +// archicture need to be set before including +// libloaderapi +#include +#include #include +// clang-format on namespace fuzzer { @@ -234,8 +239,20 @@ size_t PageSize() { } void SetThreadName(std::thread &thread, const std::string &name) { - // TODO ? - // to UTF-8 then SetThreadDescription ? + typedef HRESULT(WINAPI * proc)(HANDLE, PCWSTR); + HMODULE kbase = GetModuleHandleA("KernelBase.dll"); + proc ThreadNameProc = + reinterpret_cast(GetProcAddress(kbase, "SetThreadDescription")); + if (ThreadNameProc) { + std::wstring buf; + auto sz = MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, nullptr, 0); + if (sz > 0) { + buf.resize(sz); + if (MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, &buf[0], sz) > 0) { + (void)ThreadNameProc(thread.native_handle(), buf.c_str()); + } + } + } } } // namespace fuzzer diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc index c10943b3e48793..b3161690f3ce8a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc @@ -2808,6 +2808,15 @@ PRE_SYSCALL(fchownat) POST_SYSCALL(fchownat) (long res, long dfd, const void *filename, long user, long group, long flag) {} +PRE_SYSCALL(fchmodat2)(long dfd, const void *filename, long mode, long flag) { + if (filename) + PRE_READ(filename, + __sanitizer::internal_strlen((const char *)filename) + 1); +} + +POST_SYSCALL(fchmodat2) +(long res, long dfd, const void *filename, long mode, long flag) {} + PRE_SYSCALL(openat)(long dfd, const void *filename, long flags, long mode) { if (filename) PRE_READ(filename, diff --git a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp index 79ff74cb2d0aeb..3f11ae018fc858 100644 --- a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp +++ b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp @@ -2,7 +2,7 @@ // RUN: %clangxx %target_itanium_abi_host_triple %t -o %t.out // RUN: %test_debuginfo %s %t.out // XFAIL: gdb-clang-incompatibility -// XFAIL: system-darwin && target-aarch64 +// XFAIL: system-darwin // DEBUGGER: delete breakpoints // DEBUGGER: break static-member.cpp:33 diff --git a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp index abfa8e3337f64d..57316dfd640404 100644 --- a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp +++ b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp @@ -2,7 +2,7 @@ // RUN: %clangxx %target_itanium_abi_host_triple %t -o %t.out // RUN: %test_debuginfo %s %t.out // XFAIL: !system-darwin && gdb-clang-incompatibility -// XFAIL: system-darwin && target-aarch64 +// XFAIL: system-darwin // DEBUGGER: delete breakpoints // DEBUGGER: break static-member.cpp:33 // DEBUGGER: r diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 185e0316870e94..5cff95c7d125b0 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1120,7 +1120,21 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, if (auto refType = baseOp.getType().dyn_cast()) eleType = refType.getElementType(); - if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { + // If a variable is specified in declare target link and if device + // type is not specified as `nohost`, it needs to be mapped tofrom + mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym)); + auto declareTargetOp = + llvm::dyn_cast_if_present(op); + if (declareTargetOp && declareTargetOp.isDeclareTarget()) { + if (declareTargetOp.getDeclareTargetCaptureClause() == + mlir::omp::DeclareTargetCaptureClause::link && + declareTargetOp.getDeclareTargetDeviceType() != + mlir::omp::DeclareTargetDeviceType::nohost) { + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + } + } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } else if (!fir::isa_builtin_cptr_type(eleType)) { mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; diff --git a/flang/runtime/complex-reduction.c b/flang/runtime/complex-reduction.c index 72c31ce08b875a..c91d1253991176 100644 --- a/flang/runtime/complex-reduction.c +++ b/flang/runtime/complex-reduction.c @@ -82,7 +82,8 @@ static long_double_Complex_t CMPLXL(long double r, long double i) { * supports __builtin_complex. For Clang, require >=12.0. * Otherwise, rely on the memory layout compatibility. */ -#if (defined(__clang_major__) && (__clang_major__ >= 12)) || defined(__GNUC__) +#if (defined(__clang_major__) && (__clang_major__ >= 12)) || \ + (defined(__GNUC__) && !defined(__clang__)) #define CMPLXF128 __builtin_complex #else static CFloat128ComplexType CMPLXF128(CFloat128Type r, CFloat128Type i) { diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 index 122542345f104b..50938342dee7c2 100644 --- a/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 @@ -1,6 +1,9 @@ ! Test delayed privatization for the `private` clause. -! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir \ +! RUN: --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s subroutine delayed_privatization_firstprivate implicit none diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 index 2e9995ea1fd4c4..b13687faa3f26d 100644 --- a/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 +++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 @@ -1,6 +1,9 @@ ! Test delayed privatization for the `private` clause. -! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir \ +! RUN: --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s subroutine delayed_privatization_private implicit none diff --git a/flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90 b/flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90 new file mode 100644 index 00000000000000..7cd0597161578d --- /dev/null +++ b/flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90 @@ -0,0 +1,55 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s +!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s +!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s + +program test_link + + integer :: test_int = 1 + !$omp declare target link(test_int) + + integer :: test_array_1d(3) = (/1,2,3/) + !$omp declare target link(test_array_1d) + + integer, pointer :: test_ptr1 + !$omp declare target link(test_ptr1) + + integer, target :: test_target = 1 + !$omp declare target link(test_target) + + integer, pointer :: test_ptr2 + !$omp declare target link(test_ptr2) + + !CHECK-DAG: {{%.*}} = omp.map_info var_ptr({{%.*}} : !fir.ref, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref {name = "test_int"} + !$omp target + test_int = test_int + 1 + !$omp end target + + + !CHECK-DAG: {{%.*}} = omp.map_info var_ptr({{%.*}} : !fir.ref>, !fir.array<3xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds({{%.*}}) -> !fir.ref> {name = "test_array_1d"} + !$omp target + do i = 1,3 + test_array_1d(i) = i * 2 + end do + !$omp end target + + allocate(test_ptr1) + test_ptr1 = 1 + !CHECK-DAG: {{%.*}} = omp.map_info var_ptr({{%.*}} : !fir.ref>>, !fir.box>) map_clauses(implicit, tofrom) capture(ByRef) members({{%.*}} : !fir.llvm_ptr>) -> !fir.ref>> {name = "test_ptr1"} + !$omp target + test_ptr1 = test_ptr1 + 1 + !$omp end target + + !CHECK-DAG: {{%.*}} = omp.map_info var_ptr({{%.*}} : !fir.ref, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref {name = "test_target"} + !$omp target + test_target = test_target + 1 + !$omp end target + + + !CHECK-DAG: {{%.*}} = omp.map_info var_ptr({{%.*}} : !fir.ref>>, !fir.box>) map_clauses(implicit, tofrom) capture(ByRef) members({{%.*}} : !fir.llvm_ptr>) -> !fir.ref>> {name = "test_ptr2"} + test_ptr2 => test_target + !$omp target + test_ptr2 = test_ptr2 + 1 + !$omp end target + +end diff --git a/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 index e3d2a5a8af2608..0fb81d68016a48 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 @@ -1,6 +1,9 @@ ! Test delayed privatization for the `firstprivate` clause. -! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \ +! RUN: | FileCheck %s subroutine delayed_privatization_firstprivate implicit none diff --git a/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 index 46eef6eb3bcf6a..337e7d5ec885cb 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 @@ -1,6 +1,9 @@ ! Test delayed privatization for both `private` and `firstprivate` clauses. -! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \ +! RUN: | FileCheck %s subroutine delayed_privatization_private_firstprivate implicit none diff --git a/flang/test/Lower/OpenMP/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-private.f90 index 240e0e71bfcd16..7208521bcd77e4 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-private.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-private.f90 @@ -1,6 +1,9 @@ ! Test delayed privatization for the `private` clause. -! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \ +! RUN: | FileCheck %s subroutine delayed_privatization_private implicit none diff --git a/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 b/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 index c61f352b9b055a..a7eeb1faceadef 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 @@ -3,7 +3,10 @@ ! that the block arguments are added in the proper order (reductions first and ! then delayed privatization. -! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \ +! RUN: -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \ +! RUN: | FileCheck %s subroutine red_and_delayed_private integer :: red diff --git a/libc/config/baremetal/api.td b/libc/config/baremetal/api.td index 008eb45386f242..33b3a03828e9c7 100644 --- a/libc/config/baremetal/api.td +++ b/libc/config/baremetal/api.td @@ -2,7 +2,7 @@ include "config/public_api.td" include "spec/stdc.td" include "spec/stdc_ext.td" -include "spec/llvm_libc_ext.td" +include "spec/llvm_libc_stdfix_ext.td" def AssertMacro : MacroDef<"assert"> { let Defn = [{ diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index 99796ad5edf5d5..6e4fdb03626436 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -288,6 +288,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT) libc.src.stdfix.absr libc.src.stdfix.abslk libc.src.stdfix.abslr + libc.src.stdfix.exphk + libc.src.stdfix.expk libc.src.stdfix.roundhk libc.src.stdfix.roundhr libc.src.stdfix.roundk diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index 99796ad5edf5d5..6e4fdb03626436 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -288,6 +288,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT) libc.src.stdfix.absr libc.src.stdfix.abslk libc.src.stdfix.abslr + libc.src.stdfix.exphk + libc.src.stdfix.expk libc.src.stdfix.roundhk libc.src.stdfix.roundhr libc.src.stdfix.roundk diff --git a/libc/config/gpu/api.td b/libc/config/gpu/api.td index dbd212be56a3f1..607b8b6d5900c8 100644 --- a/libc/config/gpu/api.td +++ b/libc/config/gpu/api.td @@ -4,6 +4,7 @@ include "spec/stdc.td" include "spec/posix.td" include "spec/gpu_ext.td" include "spec/gnu_ext.td" +include "spec/stdc_ext.td" include "spec/llvm_libc_ext.td" def AssertMacro : MacroDef<"assert"> { diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 06832a41221dd8..c32773f67cda53 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -51,6 +51,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.mempcpy libc.src.string.memrchr libc.src.string.memset + libc.src.string.memset_explicit libc.src.string.rindex libc.src.string.stpcpy libc.src.string.stpncpy diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 526fd03f94f6a5..75432a2a298652 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -7,6 +7,7 @@ include "spec/gnu_ext.td" include "spec/bsd_ext.td" include "spec/stdc_ext.td" include "spec/llvm_libc_ext.td" +include "spec/llvm_libc_stdfix_ext.td" def AssertMacro : MacroDef<"assert"> { let Defn = [{ diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 705ec10960c4d2..0b77a9e170aae1 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -51,6 +51,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.mempcpy libc.src.string.memrchr libc.src.string.memset + libc.src.string.memset_explicit libc.src.string.rindex libc.src.string.stpcpy libc.src.string.stpncpy @@ -152,6 +153,11 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_bit_width_ui libc.src.stdbit.stdc_bit_width_ul libc.src.stdbit.stdc_bit_width_ull + libc.src.stdbit.stdc_bit_floor_uc + libc.src.stdbit.stdc_bit_floor_us + libc.src.stdbit.stdc_bit_floor_ui + libc.src.stdbit.stdc_bit_floor_ul + libc.src.stdbit.stdc_bit_floor_ull # stdlib.h entrypoints libc.src.stdlib.abs @@ -477,6 +483,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT) libc.src.stdfix.absr libc.src.stdfix.abslk libc.src.stdfix.abslr + libc.src.stdfix.exphk + libc.src.stdfix.expk libc.src.stdfix.roundhk libc.src.stdfix.roundhr libc.src.stdfix.roundk diff --git a/libc/docs/c23.rst b/libc/docs/c23.rst index ec9d40947cc567..24cef8539393df 100644 --- a/libc/docs/c23.rst +++ b/libc/docs/c23.rst @@ -15,36 +15,15 @@ Implementation Status (It's helpful to review 'Annex B (Informative) Library Summary' for these.) -New headers: - -* stdbit.h -* stdckdint.h (|check|, macros are only defined with `__GNUC__` builtins) Additions: -* uchar.h - - * mbrtoc8 - * c8rtomb - * char*_t - -* string.h - - * memset_explicit - * memccpy - * strdup - * strndup - -* time.h - - * gmtime_r - * localtime_r - * timegm - * timespec_getres - * strftime conversion specifiers +* fenv.h - * 0b - * 0B + * fesetexcept + * fetestexceptflag + * fegetmode + * fesetmode * math.h * acospi* @@ -96,20 +75,87 @@ Additions: * dfmal * fsqrt* * dsqrtl -* fenv.h - - * fesetexcept - * fetestexceptflag - * fegetmode - * fesetmode +* stdbit.h (New header) +* stdckdint.h (New header) |check| * stddef.h * unreachable * stdlib.h + * strfromd + * strfromf + * strfroml * free_sized * free_aligned_sized * memalignment +* string.h + + * memset_explicit |check| + * memccpy + * strdup + * strndup * tgmath.h - * + * acospi + * asinpi + * atan2pi + * atanpi + * compoundn + * cospi + * erf + * exp10m1 + * exp10 + * exp2m1 + * fmaximum + * fmaximum_mag + * fmaximum_num + * fmaximum_mag_num + * fminimum + * fminimum_mag + * fminimum_num + * fminimum_mag_num + * fromfpx + * fromfp + * llogb + * log10p1 + * log2p1 + * logp1 + * nextdown + * nextup + * pown + * powr + * rootn + * roundeven + * rsqrt + * scalbn + * sinpi + * tanpi + * ufromfpx + * ufromfp + * fadd + * dadd + * fsub + * dsub + * fmul + * dmul + * fdiv + * ddiv + * ffma + * dfma + * fsqrt + * dsqrt +* time.h + + * gmtime_r + * localtime_r + * timegm + * timespec_getres + * strftime conversion specifiers + + * 0b + * 0B +* uchar.h + + * mbrtoc8 + * c8rtomb + * char*_t diff --git a/libc/docs/index.rst b/libc/docs/index.rst index a50eb080c9ee49..370fcd843974e8 100644 --- a/libc/docs/index.rst +++ b/libc/docs/index.rst @@ -78,6 +78,7 @@ stages there is no ABI stability in any form. dev/index.rst porting contributing + talks .. toctree:: :hidden: diff --git a/libc/docs/math/stdfix.rst b/libc/docs/math/stdfix.rst index 5e39d5c01d1e53..d8dcb0cfa4c521 100644 --- a/libc/docs/math/stdfix.rst +++ b/libc/docs/math/stdfix.rst @@ -110,7 +110,7 @@ floating point types, but are not part of the ISO/IEC TR 18037:2008 spec. +===============+================+=============+===============+============+================+=============+================+=============+===============+============+================+=============+ | cos | | | | | | | | | | | | | +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ -| exp | | | | | | | | | | | | | +| exp | | | | | | | | |check| | | |check| | | | +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ | log | | | | | | | | | | | | | +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ diff --git a/libc/docs/stdbit.rst b/libc/docs/stdbit.rst index ccd1393ef5d926..3ec46cf8d8ffa4 100644 --- a/libc/docs/stdbit.rst +++ b/libc/docs/stdbit.rst @@ -91,11 +91,11 @@ stdc_bit_width_us |check| stdc_bit_width_ui |check| stdc_bit_width_ul |check| stdc_bit_width_ull |check| -stdc_bit_floor_uc -stdc_bit_floor_us -stdc_bit_floor_ui -stdc_bit_floor_ul -stdc_bit_floor_ull +stdc_bit_floor_uc |check| +stdc_bit_floor_us |check| +stdc_bit_floor_ui |check| +stdc_bit_floor_ul |check| +stdc_bit_floor_ull |check| stdc_bit_ceil_uc stdc_bit_ceil_us stdc_bit_ceil_ui @@ -126,7 +126,7 @@ stdc_count_zeros |check| stdc_count_ones |check| stdc_has_single_bit |check| stdc_bit_width |check| -stdc_bit_floor +stdc_bit_floor |check| stdc_bit_ceil ========================= ========= diff --git a/libc/docs/talks.rst b/libc/docs/talks.rst new file mode 100644 index 00000000000000..6daae9f1e7b69a --- /dev/null +++ b/libc/docs/talks.rst @@ -0,0 +1,29 @@ +===== +Talks +===== +---- +2023 +---- +* Math functions in LLVM libc or yet another correctly rounded libm - Tue Ly + + * `video `__ +* The LLVM C Library for GPUs - Joseph Huber + + * `slides `__ + * `video `__ + +---- +2022 +---- +* Using LLVM's libc - Sivachandra Reddy, Michael Jones, Tue Ly + + * `slides `__ + * `video `__ +* Using modern CPU instructions to improve LLVM's libc math library - Tue Ly + + * `slides `__ + * `video `__ +* Approximating at Scale: How strto float in LLVM’s libc is faster - Michael Jones + + * `slides `__ + * `video `__ diff --git a/libc/include/llvm-libc-macros/stdbit-macros.h b/libc/include/llvm-libc-macros/stdbit-macros.h index 104418ca4856ba..5b51068f866b71 100644 --- a/libc/include/llvm-libc-macros/stdbit-macros.h +++ b/libc/include/llvm-libc-macros/stdbit-macros.h @@ -181,6 +181,19 @@ inline unsigned stdc_bit_width(unsigned long x) { return stdc_bit_width_ul(x); } inline unsigned stdc_bit_width(unsigned long long x) { return stdc_bit_width_ull(x); } +inline unsigned char stdc_bit_floor(unsigned char x) { + return stdc_bit_floor_uc(x); +} +inline unsigned short stdc_bit_floor(unsigned short x) { + return stdc_bit_floor_us(x); +} +inline unsigned stdc_bit_floor(unsigned x) { return stdc_bit_floor_ui(x); } +inline unsigned long stdc_bit_floor(unsigned long x) { + return stdc_bit_floor_ul(x); +} +inline unsigned long long stdc_bit_floor(unsigned long long x) { + return stdc_bit_floor_ull(x); +} #else #define stdc_leading_zeros(x) \ _Generic((x), \ @@ -266,6 +279,13 @@ inline unsigned stdc_bit_width(unsigned long long x) { unsigned: stdc_bit_width_ui, \ unsigned long: stdc_bit_width_ul, \ unsigned long long: stdc_bit_width_ull)(x) +#define stdc_bit_floor(x) \ + _Generic((x), \ + unsigned char: stdc_bit_floor_ui, \ + unsigned short: stdc_bit_floor_us, \ + unsigned: stdc_bit_floor_ui, \ + unsigned long: stdc_bit_floor_ul, \ + unsigned long long: stdc_bit_floor_ull)(x) #endif // __cplusplus #endif // __LLVM_LIBC_MACROS_STDBIT_MACROS_H diff --git a/libc/spec/llvm_libc_ext.td b/libc/spec/llvm_libc_ext.td index 3241ec0550376b..ca61d4ef371a2e 100644 --- a/libc/spec/llvm_libc_ext.td +++ b/libc/spec/llvm_libc_ext.td @@ -51,29 +51,9 @@ def LLVMLibcExt : StandardSpec<"llvm_libc_ext"> { ] >; - HeaderSpec StdFix = HeaderSpec< - "stdfix.h", - [], // macros - [], // types - [], // enums - [ // functions - GuardedFunctionSpec<"sqrtuhr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtur", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtulr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"sqrtuhk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtuk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtulk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"uhksqrtus", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"uksqrtui", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - ] - >; - let Headers = [ Assert, Sched, - StdFix, Strings, ]; } diff --git a/libc/spec/llvm_libc_stdfix_ext.td b/libc/spec/llvm_libc_stdfix_ext.td new file mode 100644 index 00000000000000..7bc7ec5464081b --- /dev/null +++ b/libc/spec/llvm_libc_stdfix_ext.td @@ -0,0 +1,27 @@ +def LLVMLibcStdfixExt : StandardSpec<"llvm_libc_stdfix_ext"> { + HeaderSpec StdFix = HeaderSpec< + "stdfix.h", + [], // macros + [], // types + [], // enums + [ // functions + GuardedFunctionSpec<"exphk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"expk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + + GuardedFunctionSpec<"sqrtuhr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtur", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtulr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + + GuardedFunctionSpec<"sqrtuhk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtuk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtulk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + + GuardedFunctionSpec<"uhksqrtus", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"uksqrtui", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + ] + >; + + let Headers = [ + StdFix, + ]; +} diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index cfebc60a0a9a8e..e09cce0efd9bcc 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -234,6 +234,11 @@ def StdC : StandardSpec<"stdc"> { RetValSpec, [ArgSpec, ArgSpec, ArgSpec] >, + FunctionSpec< + "memset_explicit", + RetValSpec, + [ArgSpec, ArgSpec, ArgSpec] + >, FunctionSpec< "strcpy", RetValSpec, @@ -801,7 +806,8 @@ def StdC : StandardSpec<"stdc"> { Macro<"stdc_count_zeros">, Macro<"stdc_count_ones">, Macro<"stdc_has_single_bit">, - Macro<"std_bit_width"> + Macro<"std_bit_width">, + Macro<"std_bit_floor"> ], // Macros [], // Types [], // Enumerations @@ -860,7 +866,12 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"stdc_bit_width_us", RetValSpec, [ArgSpec]>, FunctionSpec<"stdc_bit_width_ui", RetValSpec, [ArgSpec]>, FunctionSpec<"stdc_bit_width_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_width_ull", RetValSpec, [ArgSpec]> + FunctionSpec<"stdc_bit_width_ull", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_bit_floor_uc", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_bit_floor_us", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_bit_floor_ui", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_bit_floor_ul", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_bit_floor_ull", RetValSpec, [ArgSpec]> ] // Functions >; diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 1a4b3e9a2145c0..17c04aa57e6fd6 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -95,6 +95,7 @@ add_header_library( HDRS integer_to_string.h DEPENDS + .uint libc.src.__support.common libc.src.__support.CPP.algorithm libc.src.__support.CPP.limits diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 7d11e7d5c497e0..9c74a346949f0c 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -27,13 +27,14 @@ namespace LIBC_NAMESPACE::cpp { // This implementation of bit_cast requires trivially-constructible To, to avoid // UB in the implementation. -template < - typename To, typename From, - typename = cpp::enable_if_t::value && - cpp::is_trivially_copyable::value && - cpp::is_trivially_copyable::value>> -LIBC_INLINE constexpr To bit_cast(const From &from) { +template +LIBC_INLINE constexpr cpp::enable_if_t< + (sizeof(To) == sizeof(From)) && + cpp::is_trivially_constructible::value && + cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable::value, + To> +bit_cast(const From &from) { MSAN_UNPOISON(&from, sizeof(From)); #if LIBC_HAS_BUILTIN(__builtin_bit_cast) return __builtin_bit_cast(To, from); @@ -51,8 +52,10 @@ LIBC_INLINE constexpr To bit_cast(const From &from) { #endif // LIBC_HAS_BUILTIN(__builtin_bit_cast) } -template >> -[[nodiscard]] LIBC_INLINE constexpr bool has_single_bit(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, + bool> +has_single_bit(T value) { return (value != 0) && ((value & (value - 1)) == 0); } @@ -70,8 +73,9 @@ template >> /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of 0. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countr_zero(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_zero(T value) { if (!value) return cpp::numeric_limits::digits; if (value & 0x1) @@ -103,8 +107,9 @@ ADD_SPECIALIZATION(countr_zero, unsigned long long, __builtin_ctzll) /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of 0. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countl_zero(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_zero(T value) { if (!value) return cpp::numeric_limits::digits; // Bisection method. @@ -135,8 +140,9 @@ ADD_SPECIALIZATION(countl_zero, unsigned long long, __builtin_clzll) /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of all ones. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countl_one(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_one(T value) { return cpp::countl_zero(~value); } @@ -147,8 +153,9 @@ template >> /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of all ones. -template >> -[[nodiscard]] LIBC_INLINE constexpr int countr_one(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_one(T value) { return cpp::countr_zero(~value); } @@ -156,8 +163,9 @@ template >> /// Returns 0 otherwise. /// /// Ex. bit_width(5) == 3. -template >> -[[nodiscard]] LIBC_INLINE constexpr int bit_width(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +bit_width(T value) { return cpp::numeric_limits::digits - cpp::countl_zero(value); } @@ -165,11 +173,12 @@ template >> /// nonzero. Returns 0 otherwise. /// /// Ex. bit_floor(5) == 4. -template >> -[[nodiscard]] LIBC_INLINE constexpr T bit_floor(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +bit_floor(T value) { if (!value) return 0; - return T(1) << (cpp::bit_width(value) - 1); + return static_cast(T(1) << (cpp::bit_width(value) - 1)); } /// Returns the smallest integral power of two no smaller than value if value is @@ -179,8 +188,9 @@ template >> /// /// The return value is undefined if the input is larger than the largest power /// of two representable in T. -template >> -[[nodiscard]] LIBC_INLINE constexpr T bit_ceil(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +bit_ceil(T value) { if (value < 2) return 1; return T(1) << cpp::bit_width(value - 1u); @@ -190,28 +200,31 @@ template >> // from https://blog.regehr.org/archives/1063. // Forward-declare rotr so that rotl can use it. -template >> -[[nodiscard]] LIBC_INLINE constexpr T rotr(T value, int rotate); +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate); -template >> -[[nodiscard]] LIBC_INLINE constexpr T rotl(T value, int rotate) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotl(T value, int rotate) { constexpr unsigned N = cpp::numeric_limits::digits; rotate = rotate % N; if (!rotate) return value; if (rotate < 0) - return cpp::rotr(value, -rotate); + return cpp::rotr(value, -rotate); return (value << rotate) | (value >> (N - rotate)); } -template -[[nodiscard]] LIBC_INLINE constexpr T rotr(T value, int rotate) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate) { constexpr unsigned N = cpp::numeric_limits::digits; rotate = rotate % N; if (!rotate) return value; if (rotate < 0) - return cpp::rotl(value, -rotate); + return cpp::rotl(value, -rotate); return (value >> rotate) | (value << (N - rotate)); } @@ -226,33 +239,42 @@ LIBC_INLINE constexpr To bit_or_static_cast(const From &from) { } } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_leading_zero(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_leading_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : countl_one(value) + 1; } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_leading_one(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_leading_one(T value) { return first_leading_zero(static_cast(~value)); } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_trailing_zero(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_trailing_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : countr_zero(static_cast(~value)) + 1; } -template >> -[[nodiscard]] LIBC_INLINE constexpr int first_trailing_one(T value) { +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +first_trailing_one(T value) { return value == cpp::numeric_limits::max() ? 0 : countr_zero(value) + 1; } -/// Count number of 1's aka population count or hamming weight. +/// Count number of 1's aka population count or Hamming weight. /// /// Only unsigned integral types are allowed. -template >> -[[nodiscard]] LIBC_INLINE constexpr int count_ones(T value) { +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +popcount(T value) { int count = 0; for (int i = 0; i != cpp::numeric_limits::digits; ++i) if ((value >> i) & 0x1) @@ -261,7 +283,7 @@ template >> } #define ADD_SPECIALIZATION(TYPE, BUILTIN) \ template <> \ - [[nodiscard]] LIBC_INLINE constexpr int count_ones(TYPE value) { \ + [[nodiscard]] LIBC_INLINE constexpr int popcount(TYPE value) { \ return BUILTIN(value); \ } ADD_SPECIALIZATION(unsigned char, __builtin_popcount) @@ -272,9 +294,11 @@ ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll) // TODO: 128b specializations? #undef ADD_SPECIALIZATION -template >> -[[nodiscard]] LIBC_INLINE constexpr int count_zeros(T value) { - return count_ones(static_cast(~value)); +// TODO: remove from 'bit.h' as it is not a standard function. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +count_zeros(T value) { + return popcount(static_cast(~value)); } } // namespace LIBC_NAMESPACE::cpp diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index 5973e6fab1d7d5..e899a79684b739 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -43,6 +43,9 @@ struct BigInt { static_assert(is_integral_v && is_unsigned_v, "WordType must be unsigned integer."); + using word_type = WordType; + LIBC_INLINE_VAR static constexpr bool SIGNED = Signed; + LIBC_INLINE_VAR static constexpr size_t BITS = Bits; LIBC_INLINE_VAR static constexpr size_t WORD_SIZE = sizeof(WordType) * CHAR_BIT; @@ -50,6 +53,10 @@ struct BigInt { "Number of bits in BigInt should be a multiple of WORD_SIZE."); LIBC_INLINE_VAR static constexpr size_t WORD_COUNT = Bits / WORD_SIZE; + + using unsigned_type = BigInt; + using signed_type = BigInt; + cpp::array val{}; LIBC_INLINE constexpr BigInt() = default; @@ -579,19 +586,33 @@ struct BigInt { return *this; } - LIBC_INLINE constexpr uint64_t clz() { - uint64_t leading_zeroes = 0; - for (size_t i = WORD_COUNT; i > 0; --i) { - if (val[i - 1] == 0) { - leading_zeroes += WORD_SIZE; - } else { - leading_zeroes += countl_zero(val[i - 1]); + // TODO: remove and use cpp::countl_zero below. + [[nodiscard]] LIBC_INLINE constexpr int clz() const { + constexpr int word_digits = cpp::numeric_limits::digits; + int leading_zeroes = 0; + for (auto i = val.size(); i > 0;) { + --i; + const int zeroes = countl_zero(val[i]); + leading_zeroes += zeroes; + if (zeroes != word_digits) break; - } } return leading_zeroes; } + // TODO: remove and use cpp::countr_zero below. + [[nodiscard]] LIBC_INLINE constexpr int ctz() const { + constexpr int word_digits = cpp::numeric_limits::digits; + int trailing_zeroes = 0; + for (auto word : val) { + const int zeroes = countr_zero(word); + trailing_zeroes += zeroes; + if (zeroes != word_digits) + break; + } + return trailing_zeroes; + } + LIBC_INLINE constexpr void shift_left(size_t s) { if constexpr (Bits == WORD_SIZE) { // Use native types if possible. @@ -916,66 +937,179 @@ template <> class numeric_limits> { LIBC_INLINE_VAR static constexpr int digits = 128; }; -// Provides is_integral of U/Int<128>, U/Int<192>, U/Int<256>. -template -struct is_integral> : cpp::true_type {}; - -// Provides is_unsigned of UInt<128>, UInt<192>, UInt<256>. -template -struct is_unsigned> : cpp::bool_constant {}; +// type traits to determine whether a T is a cpp::BigInt. +template struct is_big_int : cpp::false_type {}; template -struct make_unsigned> - : type_identity> {}; - -template -struct make_signed> - : type_identity> {}; - -namespace internal { -template struct is_custom_uint : cpp::false_type {}; - -template -struct is_custom_uint> : cpp::true_type {}; -} // namespace internal - -// bit_cast to UInt -// Note: The standard scheme for SFINAE selection is to have exactly one -// function instanciation valid at a time. This is usually done by having a -// predicate in one function and the negated predicate in the other one. -// e.g. -// template::value == true> ... -// template::value == false> ... -// -// Unfortunately this would make the default 'cpp::bit_cast' aware of -// 'is_custom_uint' (or any other customization). To prevent exposing all -// customizations in the original function, we create a different function with -// four 'typename's instead of three - otherwise it would be considered as a -// redeclaration of the same function leading to "error: template parameter -// redefines default argument". -template ::value && - cpp::is_trivially_copyable::value>, - typename = cpp::enable_if_t::value>> -LIBC_INLINE constexpr To bit_cast(const From &from) { +struct is_big_int> : cpp::true_type {}; + +template +LIBC_INLINE_VAR constexpr bool is_big_int_v = is_big_int::value; + +// Specialization of cpp::bit_cast ('bit.h') from T to BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t< + (sizeof(To) == sizeof(From)) && cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable::value && is_big_int::value, + To> +bit_cast(const From &from) { To out; using Storage = decltype(out.val); out.val = cpp::bit_cast(from); return out; } -// bit_cast from UInt -template < - typename To, size_t Bits, - typename = cpp::enable_if_t) && - cpp::is_trivially_constructible::value && - cpp::is_trivially_copyable::value && - cpp::is_trivially_copyable>::value>> -LIBC_INLINE constexpr To bit_cast(const UInt &from) { +// Specialization of cpp::bit_cast ('bit.h') from BigInt to T. +template +LIBC_INLINE constexpr cpp::enable_if_t< + sizeof(To) == sizeof(UInt) && + cpp::is_trivially_constructible::value && + cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable>::value, + To> +bit_cast(const UInt &from) { return cpp::bit_cast(from.val); } +// Specialization of cpp::has_single_bit ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, bool> +has_single_bit(T value) { + int bits = 0; + for (auto word : value.val) { + if (word == 0) + continue; + bits += popcount(word); + if (bits > 1) + return false; + } + return bits == 1; +} + +// Specialization of cpp::countr_zero ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_zero(const T &value) { + return value.ctz(); +} + +// Specialization of cpp::countl_zero ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_zero(const T &value) { + return value.clz(); +} + +// Specialization of cpp::countl_one ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_one(T value) { + // TODO : Implement a faster version not involving operator~. + return cpp::countl_zero(~value); +} + +// Specialization of cpp::countr_one ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_one(T value) { + // TODO : Implement a faster version not involving operator~. + return cpp::countr_zero(~value); +} + +// Specialization of cpp::bit_width ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +bit_width(T value) { + return cpp::numeric_limits::digits - cpp::countl_zero(value); +} + +// Forward-declare rotr so that rotl can use it. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate); + +// Specialization of cpp::rotl ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotl(T value, int rotate) { + constexpr unsigned N = cpp::numeric_limits::digits; + rotate = rotate % N; + if (!rotate) + return value; + if (rotate < 0) + return cpp::rotr(value, -rotate); + return (value << rotate) | (value >> (N - rotate)); +} + +// Specialization of cpp::rotr ('bit.h') for BigInt. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +rotr(T value, int rotate) { + constexpr unsigned N = cpp::numeric_limits::digits; + rotate = rotate % N; + if (!rotate) + return value; + if (rotate < 0) + return cpp::rotl(value, -rotate); + return (value >> rotate) | (value << (N - rotate)); +} + } // namespace LIBC_NAMESPACE::cpp +namespace LIBC_NAMESPACE { + +// Specialization of mask_trailing_ones ('math_extras.h') for BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_trailing_ones() { + static_assert(!T::SIGNED); + if (count == 0) + return T(); + constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); + static_assert(count <= T_BITS && "Invalid bit index"); + using word_type = typename T::word_type; + T out; + constexpr int CHUNK_INDEX_CONTAINING_BIT = + static_cast(count / T::WORD_SIZE); + int index = 0; + for (auto &word : out.val) { + if (index < CHUNK_INDEX_CONTAINING_BIT) + word = -1; + else if (index > CHUNK_INDEX_CONTAINING_BIT) + word = 0; + else + word = mask_trailing_ones(); + ++index; + } + return out; +} + +// Specialization of mask_leading_ones ('math_extras.h') for BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_leading_ones() { + static_assert(!T::SIGNED); + if (count == 0) + return T(); + constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); + static_assert(count <= T_BITS && "Invalid bit index"); + using word_type = typename T::word_type; + T out; + constexpr int CHUNK_INDEX_CONTAINING_BIT = + static_cast((T::BITS - count - 1ULL) / T::WORD_SIZE); + int index = 0; + for (auto &word : out.val) { + if (index < CHUNK_INDEX_CONTAINING_BIT) + word = 0; + else if (index > CHUNK_INDEX_CONTAINING_BIT) + word = -1; + else + word = mask_leading_ones(); + ++index; + } + return out; +} + +} // namespace LIBC_NAMESPACE + #endif // LLVM_LIBC_SRC___SUPPORT_UINT_H diff --git a/libc/src/__support/fixed_point/fx_rep.h b/libc/src/__support/fixed_point/fx_rep.h index 042cd2b20714c6..f13640a6c01918 100644 --- a/libc/src/__support/fixed_point/fx_rep.h +++ b/libc/src/__support/fixed_point/fx_rep.h @@ -45,7 +45,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return SFRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return SFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return SFRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0HR; } LIBC_INLINE static constexpr Type EPS() { return SFRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5HR; } @@ -65,7 +65,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return USFRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return USFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return USFRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UHR; } LIBC_INLINE static constexpr Type EPS() { return USFRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UHR; } @@ -85,7 +85,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return FRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return FRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return FRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0R; } LIBC_INLINE static constexpr Type EPS() { return FRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5R; } @@ -105,7 +105,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return UFRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return UFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return UFRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UR; } LIBC_INLINE static constexpr Type EPS() { return UFRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UR; } @@ -125,7 +125,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return LFRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return LFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return LFRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0LR; } LIBC_INLINE static constexpr Type EPS() { return LFRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5LR; } @@ -145,7 +145,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return ULFRACT_MIN; } - LIBC_INLINE static constexpr Type MAX() { return ULFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return ULFRACT_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0ULR; } LIBC_INLINE static constexpr Type EPS() { return ULFRACT_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5ULR; } @@ -165,7 +165,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return SACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return SACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return SACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0HK; } LIBC_INLINE static constexpr Type EPS() { return SACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5HK; } @@ -185,7 +185,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return USACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return USACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return USACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UHK; } LIBC_INLINE static constexpr Type EPS() { return USACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UHK; } @@ -205,7 +205,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return ACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return ACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return ACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0K; } LIBC_INLINE static constexpr Type EPS() { return ACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5K; } @@ -225,7 +225,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return UACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return UACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return UACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UK; } LIBC_INLINE static constexpr Type EPS() { return UACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UK; } @@ -245,7 +245,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return LACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return LACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return LACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0LK; } LIBC_INLINE static constexpr Type EPS() { return LACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5LK; } @@ -265,7 +265,7 @@ template <> struct FXRep { SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN; LIBC_INLINE static constexpr Type MIN() { return ULACCUM_MIN; } - LIBC_INLINE static constexpr Type MAX() { return ULACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return ULACCUM_MAX; } LIBC_INLINE static constexpr Type ZERO() { return 0.0ULK; } LIBC_INLINE static constexpr Type EPS() { return ULACCUM_EPSILON; } LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5ULK; } diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index 744842ced8d772..27476433a94575 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -713,7 +713,7 @@ template <> class FloatToString { float_as_fixed.shift_left(SHIFT_AMOUNT); // If there are still digits above the decimal point, handle those. - if (float_as_fixed.clz() < EXTRA_INT_WIDTH) { + if (float_as_fixed.clz() < static_cast(EXTRA_INT_WIDTH)) { cpp::UInt above_decimal_point = float_as_fixed >> FLOAT_AS_INT_WIDTH; diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index 81ed21ccfca166..a5872dce652036 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -67,6 +67,7 @@ #include "src/__support/CPP/span.h" #include "src/__support/CPP/string_view.h" #include "src/__support/CPP/type_traits.h" +#include "src/__support/UInt.h" // is_big_int #include "src/__support/common.h" namespace LIBC_NAMESPACE { @@ -149,6 +150,18 @@ template class StringBufferWriterImpl { using StringBufferWriter = StringBufferWriterImpl; using BackwardStringBufferWriter = StringBufferWriterImpl; +template struct IntegerWriterUnsigned {}; + +template +struct IntegerWriterUnsigned>> { + using type = cpp::make_unsigned_t; +}; + +template +struct IntegerWriterUnsigned>> { + using type = typename T::unsigned_type; +}; + } // namespace details namespace radix { @@ -163,7 +176,7 @@ template using Custom = details::Fmt; // See file header for documentation. template class IntegerToString { - static_assert(cpp::is_integral_v); + static_assert(cpp::is_integral_v || cpp::is_big_int_v); LIBC_INLINE static constexpr size_t compute_buffer_size() { constexpr auto MAX_DIGITS = []() -> size_t { @@ -208,8 +221,8 @@ template class IntegerToString { // An internal stateless structure that handles the number formatting logic. struct IntegerWriter { - static_assert(cpp::is_integral_v); - using UNSIGNED_T = cpp::make_unsigned_t; + static_assert(cpp::is_integral_v || cpp::is_big_int_v); + using UNSIGNED_T = typename details::IntegerWriterUnsigned::type; LIBC_INLINE static char digit_char(uint8_t digit) { if (digit < 10) diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h index 7a89fbb11b2a9e..c6b458ddecdabf 100644 --- a/libc/src/__support/math_extras.h +++ b/libc/src/__support/math_extras.h @@ -20,21 +20,18 @@ namespace LIBC_NAMESPACE { // Create a bitmask with the count right-most bits set to 1, and all other bits // set to 0. Only unsigned types are allowed. template -LIBC_INLINE constexpr T mask_trailing_ones() { - static_assert(cpp::is_unsigned_v); +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_trailing_ones() { constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); static_assert(count <= T_BITS && "Invalid bit index"); - // It's important not to initialize T with -1, since T may be BigInt which - // will take -1 as a uint64_t and only initialize the low 64 bits. - constexpr T ALL_ZEROES(0); - constexpr T ALL_ONES(~ALL_ZEROES); // bitwise NOT performs integer promotion. - return count == 0 ? 0 : (ALL_ONES >> (T_BITS - count)); + return count == 0 ? 0 : (T(-1) >> (T_BITS - count)); } // Create a bitmask with the count left-most bits set to 1, and all other bits // set to 0. Only unsigned types are allowed. template -LIBC_INLINE constexpr T mask_leading_ones() { +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_leading_ones() { constexpr T MASK(mask_trailing_ones()); return T(~MASK); // bitwise NOT performs integer promotion. } diff --git a/libc/src/stdbit/CMakeLists.txt b/libc/src/stdbit/CMakeLists.txt index f077baeee6d275..7ab4fee4454a15 100644 --- a/libc/src/stdbit/CMakeLists.txt +++ b/libc/src/stdbit/CMakeLists.txt @@ -11,6 +11,7 @@ set(prefixes count_ones has_single_bit bit_width + bit_floor ) set(suffixes c s i l ll) foreach(prefix IN LISTS prefixes) diff --git a/libc/src/stdbit/stdc_bit_floor_uc.cpp b/libc/src/stdbit/stdc_bit_floor_uc.cpp new file mode 100644 index 00000000000000..6cb04c9eb43e62 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_uc.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_bit_floor_uc -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_bit_floor_uc.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned char, stdc_bit_floor_uc, (unsigned char value)) { + return cpp::bit_floor(value); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_bit_floor_uc.h b/libc/src/stdbit/stdc_bit_floor_uc.h new file mode 100644 index 00000000000000..d6f53c5f699797 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_uc.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_bit_floor_uc -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UC_H +#define LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UC_H + +namespace LIBC_NAMESPACE { + +unsigned char stdc_bit_floor_uc(unsigned char value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UC_H diff --git a/libc/src/stdbit/stdc_bit_floor_ui.cpp b/libc/src/stdbit/stdc_bit_floor_ui.cpp new file mode 100644 index 00000000000000..149b63f190cf37 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ui.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_bit_floor_ui -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_bit_floor_ui.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_bit_floor_ui, (unsigned value)) { + return cpp::bit_floor(value); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_bit_floor_ui.h b/libc/src/stdbit/stdc_bit_floor_ui.h new file mode 100644 index 00000000000000..fcc606386f86d3 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ui.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_bit_floor_ui -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UI_H +#define LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UI_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_bit_floor_ui(unsigned value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UI_H diff --git a/libc/src/stdbit/stdc_bit_floor_ul.cpp b/libc/src/stdbit/stdc_bit_floor_ul.cpp new file mode 100644 index 00000000000000..a29a044545684e --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ul.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_bit_floor_ul -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_bit_floor_ul.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned long, stdc_bit_floor_ul, (unsigned long value)) { + return cpp::bit_floor(value); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_bit_floor_ul.h b/libc/src/stdbit/stdc_bit_floor_ul.h new file mode 100644 index 00000000000000..08327aa60c9069 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ul.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_bit_floor_ul -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UL_H +#define LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UL_H + +namespace LIBC_NAMESPACE { + +unsigned long stdc_bit_floor_ul(unsigned long value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_UL_H diff --git a/libc/src/stdbit/stdc_bit_floor_ull.cpp b/libc/src/stdbit/stdc_bit_floor_ull.cpp new file mode 100644 index 00000000000000..d1084b63573227 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ull.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_bit_floor_ull ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_bit_floor_ull.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned long long, stdc_bit_floor_ull, + (unsigned long long value)) { + return cpp::bit_floor(value); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_bit_floor_ull.h b/libc/src/stdbit/stdc_bit_floor_ull.h new file mode 100644 index 00000000000000..8f360b23855ad6 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_ull.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_bit_floor_ull ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_ULL_H +#define LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_ULL_H + +namespace LIBC_NAMESPACE { + +unsigned long long stdc_bit_floor_ull(unsigned long long value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_ULL_H diff --git a/libc/src/stdbit/stdc_bit_floor_us.cpp b/libc/src/stdbit/stdc_bit_floor_us.cpp new file mode 100644 index 00000000000000..d1357a980e3a8a --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_us.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_bit_floor_us -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_bit_floor_us.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned short, stdc_bit_floor_us, (unsigned short value)) { + return cpp::bit_floor(value); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_bit_floor_us.h b/libc/src/stdbit/stdc_bit_floor_us.h new file mode 100644 index 00000000000000..fcd0b9e3c549a1 --- /dev/null +++ b/libc/src/stdbit/stdc_bit_floor_us.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_bit_floor_us -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_US_H +#define LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_US_H + +namespace LIBC_NAMESPACE { + +unsigned short stdc_bit_floor_us(unsigned short value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_BIT_FLOOR_US_H diff --git a/libc/src/stdbit/stdc_count_ones_uc.cpp b/libc/src/stdbit/stdc_count_ones_uc.cpp index 5a7314caa3baa0..1e998ff521b7db 100644 --- a/libc/src/stdbit/stdc_count_ones_uc.cpp +++ b/libc/src/stdbit/stdc_count_ones_uc.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_uc, (unsigned char value)) { - return static_cast(cpp::count_ones(value)); + return static_cast(cpp::popcount(value)); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_count_ones_ui.cpp b/libc/src/stdbit/stdc_count_ones_ui.cpp index 289f4bac31f7b8..e457dd793db33d 100644 --- a/libc/src/stdbit/stdc_count_ones_ui.cpp +++ b/libc/src/stdbit/stdc_count_ones_ui.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_ui, (unsigned value)) { - return static_cast(cpp::count_ones(value)); + return static_cast(cpp::popcount(value)); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_count_ones_ul.cpp b/libc/src/stdbit/stdc_count_ones_ul.cpp index 83f3279d791937..ed86653fc7ee2e 100644 --- a/libc/src/stdbit/stdc_count_ones_ul.cpp +++ b/libc/src/stdbit/stdc_count_ones_ul.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_ul, (unsigned long value)) { - return static_cast(cpp::count_ones(value)); + return static_cast(cpp::popcount(value)); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_count_ones_ull.cpp b/libc/src/stdbit/stdc_count_ones_ull.cpp index 104788aaf21265..c5ecc3cda6477a 100644 --- a/libc/src/stdbit/stdc_count_ones_ull.cpp +++ b/libc/src/stdbit/stdc_count_ones_ull.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_ull, (unsigned long long value)) { - return static_cast(cpp::count_ones(value)); + return static_cast(cpp::popcount(value)); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_count_ones_us.cpp b/libc/src/stdbit/stdc_count_ones_us.cpp index 4b6ff0b94b626a..465c5c374e7c64 100644 --- a/libc/src/stdbit/stdc_count_ones_us.cpp +++ b/libc/src/stdbit/stdc_count_ones_us.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_us, (unsigned short value)) { - return static_cast(cpp::count_ones(value)); + return static_cast(cpp::popcount(value)); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/stdfix/CMakeLists.txt b/libc/src/stdfix/CMakeLists.txt index 3a1cb66b7abcaf..10d76ae31349f9 100644 --- a/libc/src/stdfix/CMakeLists.txt +++ b/libc/src/stdfix/CMakeLists.txt @@ -67,3 +67,29 @@ add_entrypoint_object( DEPENDS libc.src.__support.fixed_point.sqrt ) + +add_entrypoint_object( + exphk + HDRS + exphk.h + SRCS + exphk.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.__support.fixed_point.fx_rep + libc.src.__support.CPP.bit +) + +add_entrypoint_object( + expk + HDRS + expk.h + SRCS + expk.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.__support.fixed_point.fx_rep + libc.src.__support.CPP.bit +) diff --git a/libc/src/stdfix/exphk.cpp b/libc/src/stdfix/exphk.cpp new file mode 100644 index 00000000000000..19a972b390c71b --- /dev/null +++ b/libc/src/stdfix/exphk.cpp @@ -0,0 +1,92 @@ +//===-- Implementation of exphk function ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exphk.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" +#include "src/__support/fixed_point/fx_bits.h" + +namespace LIBC_NAMESPACE { + +namespace { + +// Look up tables for exp(hi) and exp(mid). +// Generated with Sollya: +// > for i from 0 to 89 do { +// hi = floor(i/8) - 5; +// m = i/8 - floor(i/8) - 0.5; +// e_hi = nearestint(exp(hi) * 2^7) * 2^-7; +// e_mid = nearestint(exp(m) * 2^7) * 2^-7; +// print(hi, e_hi, m, e_mid); +// }; +// Notice that when i = 88 and 89, e_hi will overflow short accum range. +static constexpr short accum EXP_HI[12] = { + 0x1.0p-7hk, 0x1.0p-6hk, 0x1.8p-5hk, 0x1.1p-3hk, 0x1.78p-2hk, 0x1.0p0hk, + 0x1.5cp1hk, 0x1.d9p2hk, 0x1.416p4hk, 0x1.b4dp5hk, 0x1.28d4p7hk, SACCUM_MAX, +}; + +static constexpr short accum EXP_MID[8] = { + 0x1.38p-1hk, 0x1.6p-1hk, 0x1.9p-1hk, 0x1.c4p-1hk, + 0x1.0p0hk, 0x1.22p0hk, 0x1.48p0hk, 0x1.74p0hk, +}; + +} // anonymous namespace + +LLVM_LIBC_FUNCTION(short accum, exphk, (short accum x)) { + using FXRep = fixed_point::FXRep; + using StorageType = typename FXRep::StorageType; + // Output overflow + if (LIBC_UNLIKELY(x >= 0x1.64p2hk)) + return FXRep::MAX(); + // Lower bound where exp(x) -> 0: + // floor(log(2^-8) * 2^7) * 2^-7 + if (LIBC_UNLIKELY(x <= -0x1.63p2hk)) + return FXRep::ZERO(); + + // Current range of x: + // -0x1.628p2 <= x <= 0x1.638p2 + // Range reduction: + // x = hi + mid + lo, + // where: + // hi is an integer + // mid * 2^3 is an integer + // |lo| <= 2^-4. + // Then exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo) + // ~ exp(hi) * exp(mid) * (1 + lo) + // with relative errors < |lo|^2 <= 2^-8. + // exp(hi) and exp(mid) are extracted from small lookup tables. + + // Round-to-nearest 1/8, tie-to-(+Int): + constexpr short accum ONE_SIXTEENTH = 0x1.0p-4hk; + // x_rounded = floor(x + 1/16). + short accum x_rounded = ((x + ONE_SIXTEENTH) >> (FXRep::FRACTION_LEN - 3)) + << (FXRep::FRACTION_LEN - 3); + short accum lo = x - x_rounded; + + // Range of x_rounded: + // x_rounded >= floor((-0x1.628p2 + 0x1.0p-4) * 2^3) * 2^-3 + // = -0x1.6p2 = -5.5 + // To get the indices, we shift the values so that it start with 0. + // Range of indices: 0 <= indices <= 89 + StorageType indices = cpp::bit_cast((x_rounded + 0x1.6p2hk) >> + (FXRep::FRACTION_LEN - 3)); + // So we have the following relation: + // indices = (hi + mid + 44/8) * 8 + // That implies: + // hi + mid = indices/8 - 5.5 + // So for lookup tables, we can use the upper 4 bits to get: + // exp( floor(indices / 8) - 5 ) + // and lower 3 bits for: + // exp( (indices - floor(indices)) - 0.5 ) + short accum exp_hi = EXP_HI[indices >> 3]; + short accum exp_mid = EXP_MID[indices & 0x7]; + // exp(x) ~ exp(hi) * exp(mid) * (1 + lo); + return (exp_hi * (exp_mid * (0x1.0p0hk + lo))); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdfix/exphk.h b/libc/src/stdfix/exphk.h new file mode 100644 index 00000000000000..da03bb76d53f53 --- /dev/null +++ b/libc/src/stdfix/exphk.h @@ -0,0 +1,20 @@ +//===-- Implementation header for exphk -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDFIX_EXPHK_H +#define LLVM_LIBC_SRC_STDFIX_EXPHK_H + +#include "include/llvm-libc-macros/stdfix-macros.h" + +namespace LIBC_NAMESPACE { + +short accum exphk(short accum x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDFIX_EXPHK_H diff --git a/libc/src/stdfix/expk.cpp b/libc/src/stdfix/expk.cpp new file mode 100644 index 00000000000000..57227fd27769cc --- /dev/null +++ b/libc/src/stdfix/expk.cpp @@ -0,0 +1,104 @@ +//===-- Implementation of expk function ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "expk.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" +#include "src/__support/fixed_point/fx_bits.h" + +namespace LIBC_NAMESPACE { + +namespace { + +// Look up tables for exp(hi) and exp(mid). +// Generated with Sollya: +// > for i from 0 to 23 do { +// hi = i - 11; +// e_hi = nearestint(exp(hi) * 2^15) * 2^-15; +// print(e_hi, "k,"); +// }; +static constexpr accum EXP_HI[24] = { + 0x1p-15k, 0x1p-15k, 0x1p-13k, 0x1.6p-12k, + 0x1.ep-11k, 0x1.44p-9k, 0x1.bap-8k, 0x1.2cp-6k, + 0x1.97cp-5k, 0x1.153p-3k, 0x1.78b8p-2k, 0x1p0k, + 0x1.5bf1p1k, 0x1.d8e68p2k, 0x1.415e6p4k, 0x1.b4c9p5k, + 0x1.28d388p7k, 0x1.936dc6p8k, 0x1.1228858p10k, 0x1.749ea7cp11k, + 0x1.fa7157cp12k, 0x1.5829dcf8p14k, 0x1.d3c4489p15k, ACCUM_MAX, +}; + +// Generated with Sollya: +// > for i from 0 to 15 do { +// m = i/16 - 0.0625; +// e_m = nearestint(exp(m) * 2^15) * 2^-15; +// print(e_m, "k,"); +// }; +static constexpr accum EXP_MID[16] = { + 0x1.e0fcp-1k, 0x1p0k, 0x1.1082p0k, 0x1.2216p0k, + 0x1.34ccp0k, 0x1.48b6p0k, 0x1.5deap0k, 0x1.747ap0k, + 0x1.8c8p0k, 0x1.a612p0k, 0x1.c14cp0k, 0x1.de46p0k, + 0x1.fd1ep0k, 0x1.0efap1k, 0x1.2074p1k, 0x1.330ep1k, +}; + +} // anonymous namespace + +LLVM_LIBC_FUNCTION(accum, expk, (accum x)) { + using FXRep = fixed_point::FXRep; + using StorageType = typename FXRep::StorageType; + // Output overflow + // > floor(log(2^16) * 2^15) * 2^-15 + if (LIBC_UNLIKELY(x >= 0x1.62e4p3k)) + return FXRep::MAX(); + // Lower bound where exp(x) -> 0: + // floor(log(2^-16) * 2^15) * 2^-15 + if (LIBC_UNLIKELY(x <= -0x1.62e44p3k)) + return FXRep::ZERO(); + + // Current range of x: + // -0x1.62e4p3 <= x <= 0x1.62e3cp3 + // Range reduction: + // x = hi + mid + lo, + // where: + // hi is an integer + // mid * 2^4 is an integer + // |lo| <= 2^-5. + // Then exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo) + // ~ exp(hi) * exp(mid) * (1 + lo + lo^2 / 2) + // with relative errors < |lo|^3/2 <= 2^-16. + // exp(hi) and exp(mid) are extracted from small lookup tables. + + // Round-to-nearest 1/16, tie-to-(+Int): + constexpr accum ONE_THIRTY_SECOND = 0x1.0p-5k; + // x_rounded = floor(x + 1/16). + accum x_rounded = ((x + ONE_THIRTY_SECOND) >> (FXRep::FRACTION_LEN - 4)) + << (FXRep::FRACTION_LEN - 4); + accum lo = x - x_rounded; + + // Range of x_rounded: + // x_rounded >= floor((-0x1.62e4p3 + 0x1.0p-5) * 2^4) * 2^-4 + // = -0x1.62p3 = -11.0625 + // To get the indices, we shift the values so that it start with 0. + // Range of indices: 0 <= indices <= 355. + StorageType indices = cpp::bit_cast((x_rounded + 0x1.62p3k) >> + (FXRep::FRACTION_LEN - 4)); + // So we have the following relation: + // indices = (hi + mid + 177/16) * 16 + // That implies: + // hi + mid = indices/16 - 11.0625 + // So for lookup tables, we can use the upper 4 bits to get: + // exp( floor(indices / 16) - 11 ) + // and lower 4 bits for: + // exp( (indices - floor(indices)) - 0.0625 ) + accum exp_hi = EXP_HI[indices >> 4]; + accum exp_mid = EXP_MID[indices & 0xf]; + // exp(x) ~ exp(hi) * exp(mid) * (1 + lo); + accum l1 = 0x1.0p0k + (lo >> 1); // = 1 + lo / 2 + accum l2 = 0x1.0p0k + lo * l1; // = 1 + lo * (1 + lo / 2) = 1 + lo + lo^2/2 + return (exp_hi * (exp_mid * l2)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdfix/expk.h b/libc/src/stdfix/expk.h new file mode 100644 index 00000000000000..4526686a200b47 --- /dev/null +++ b/libc/src/stdfix/expk.h @@ -0,0 +1,20 @@ +//===-- Implementation header for expk --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDFIX_EXPK_H +#define LLVM_LIBC_SRC_STDFIX_EXPK_H + +#include "include/llvm-libc-macros/stdfix-macros.h" + +namespace LIBC_NAMESPACE { + +accum expk(accum x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDFIX_EXPK_H diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h index a6c68329e66023..27d229a3e42cb5 100644 --- a/libc/src/stdio/printf_core/float_dec_converter.h +++ b/libc/src/stdio/printf_core/float_dec_converter.h @@ -12,6 +12,7 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/UInt.h" // cpp::is_big_int_v #include "src/__support/float_to_string.h" #include "src/__support/integer_to_string.h" #include "src/__support/libc_assert.h" @@ -33,7 +34,8 @@ using ExponentString = // Returns true if value is divisible by 2^p. template -LIBC_INLINE constexpr cpp::enable_if_t, bool> +LIBC_INLINE constexpr cpp::enable_if_t< + cpp::is_integral_v || cpp::is_big_int_v, bool> multiple_of_power_of_2(T value, uint32_t p) { return (value & ((T(1) << p) - 1)) == 0; } @@ -76,7 +78,8 @@ LIBC_INLINE RoundDirection get_round_direction(int last_digit, bool truncated, } template -LIBC_INLINE constexpr cpp::enable_if_t, bool> +LIBC_INLINE constexpr cpp::enable_if_t< + cpp::is_integral_v || cpp::is_big_int_v, bool> zero_after_digits(int32_t base_2_exp, int32_t digits_after_point, T mantissa, const int32_t mant_width) { const int32_t required_twos = -base_2_exp - digits_after_point - 1; diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 1c893280e8a3c2..56588ffafb86f0 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -441,6 +441,17 @@ add_entrypoint_object( .memory_utils.inline_memcpy ) +add_entrypoint_object( + memset_explicit + SRCS + memset_explicit.cpp + HDRS + memset_explicit.h + DEPENDS + .string_utils + .memory_utils.inline_memset +) + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp new file mode 100644 index 00000000000000..a8656d1e791e84 --- /dev/null +++ b/libc/src/string/memset_explicit.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of memset_explicit ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memset_explicit.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/inline_memset.h" + +namespace LIBC_NAMESPACE { + +[[gnu::noinline]] LLVM_LIBC_FUNCTION(void *, memset_explicit, + (void *dst, int value, size_t count)) { + // Use the inline memset function to set the memory. + inline_memset(dst, static_cast(value), count); + // avoid dead store elimination + // The asm itself should also be sufficient to behave as a compiler barrier. + asm("" : : "r"(dst) : "memory"); + return dst; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/string/memset_explicit.h b/libc/src/string/memset_explicit.h new file mode 100644 index 00000000000000..f6c189761a123c --- /dev/null +++ b/libc/src/string/memset_explicit.h @@ -0,0 +1,20 @@ +//===-- Implementation header for memset_explicit ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H +#define LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H + +#include // size_t + +namespace LIBC_NAMESPACE { + +[[gnu::noinline]] void *memset_explicit(void *ptr, int value, size_t count); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 4668f0061975f8..8a35f1204eb511 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -74,6 +74,7 @@ add_unittest_framework_library( libc.src.__support.CPP.type_traits libc.src.__support.fixed_point.fx_rep libc.src.__support.OSUtil.osutil + libc.src.__support.uint libc.src.__support.uint128 ) @@ -103,6 +104,7 @@ add_header_library( DEPENDS libc.src.__support.CPP.string libc.src.__support.CPP.type_traits + libc.src.__support.uint ) add_unittest_framework_library( diff --git a/libc/test/UnitTest/LibcTest.cpp b/libc/test/UnitTest/LibcTest.cpp index 7b0e4fca83683b..0340f7ed37100e 100644 --- a/libc/test/UnitTest/LibcTest.cpp +++ b/libc/test/UnitTest/LibcTest.cpp @@ -38,7 +38,8 @@ TestLogger &operator<<(TestLogger &logger, Location Loc) { // When the value is UInt128, __uint128_t or wider, show its hexadecimal // digits. template -cpp::enable_if_t && (sizeof(T) > sizeof(uint64_t)), +cpp::enable_if_t<(cpp::is_integral_v && (sizeof(T) > sizeof(uint64_t))) || + cpp::is_big_int_v, cpp::string> describeValue(T Value) { static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt"); @@ -47,11 +48,10 @@ describeValue(T Value) { } // When the value is of a standard integral type, just display it as normal. -template -cpp::enable_if_t && - sizeof(ValType) <= sizeof(uint64_t), +template +cpp::enable_if_t && (sizeof(T) <= sizeof(uint64_t)), cpp::string> -describeValue(ValType Value) { +describeValue(T Value) { return cpp::to_string(Value); } diff --git a/libc/test/UnitTest/LibcTest.h b/libc/test/UnitTest/LibcTest.h index 639f6005832576..d26d6490bcb572 100644 --- a/libc/test/UnitTest/LibcTest.h +++ b/libc/test/UnitTest/LibcTest.h @@ -127,6 +127,7 @@ class Test { // of type promotion. template || + cpp::is_big_int_v || cpp::is_fixed_point_v, int> = 0> bool test(TestCond Cond, ValType LHS, ValType RHS, const char *LHSStr, diff --git a/libc/test/UnitTest/StringUtils.h b/libc/test/UnitTest/StringUtils.h index 54cff97ceafb4e..1e3ba5715d23d6 100644 --- a/libc/test/UnitTest/StringUtils.h +++ b/libc/test/UnitTest/StringUtils.h @@ -11,12 +11,13 @@ #include "src/__support/CPP/string.h" #include "src/__support/CPP/type_traits.h" +#include "src/__support/UInt.h" namespace LIBC_NAMESPACE { // Return the first N hex digits of an integer as a string in upper case. template -cpp::enable_if_t, cpp::string> +cpp::enable_if_t || cpp::is_big_int_v, cpp::string> int_to_hex(T value, size_t length = sizeof(T) * 2) { cpp::string s(length, '0'); diff --git a/libc/test/UnitTest/TestLogger.cpp b/libc/test/UnitTest/TestLogger.cpp index 6bb0e17dc3888e..469b3a11d57d9b 100644 --- a/libc/test/UnitTest/TestLogger.cpp +++ b/libc/test/UnitTest/TestLogger.cpp @@ -2,6 +2,7 @@ #include "src/__support/CPP/string.h" #include "src/__support/CPP/string_view.h" #include "src/__support/OSUtil/io.h" // write_to_stderr +#include "src/__support/UInt.h" // is_big_int #include "src/__support/UInt128.h" #include @@ -47,8 +48,9 @@ template <> TestLogger &TestLogger::operator<<(void *addr) { } template TestLogger &TestLogger::operator<<(T t) { - if constexpr (cpp::is_integral_v && cpp::is_unsigned_v && - sizeof(T) > sizeof(uint64_t)) { + if constexpr (cpp::is_big_int_v || + (cpp::is_integral_v && cpp::is_unsigned_v && + (sizeof(T) > sizeof(uint64_t)))) { static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt"); const IntegerToString buffer(t); return *this << buffer.view(); @@ -68,7 +70,7 @@ template TestLogger &TestLogger::operator<< (unsigned short); template TestLogger &TestLogger::operator<< (unsigned int); template TestLogger &TestLogger::operator<< (unsigned long); template TestLogger & -TestLogger::operator<< (unsigned long long); + TestLogger::operator<< (unsigned long long); #ifdef __SIZEOF_INT128__ template TestLogger &TestLogger::operator<< <__uint128_t>(__uint128_t); diff --git a/libc/test/include/stdbit_test.cpp b/libc/test/include/stdbit_test.cpp index dfb7c97e3d9ee0..20820d52fbdede 100644 --- a/libc/test/include/stdbit_test.cpp +++ b/libc/test/include/stdbit_test.cpp @@ -91,6 +91,13 @@ unsigned stdc_bit_width_us(unsigned short) noexcept { return 0x4BU; } unsigned stdc_bit_width_ui(unsigned) noexcept { return 0x4CU; } unsigned stdc_bit_width_ul(unsigned long) noexcept { return 0x4DU; } unsigned stdc_bit_width_ull(unsigned long long) noexcept { return 0x4EU; } +unsigned char stdc_bit_floor_uc(unsigned char) noexcept { return 0x5AU; } +unsigned short stdc_bit_floor_us(unsigned short) noexcept { return 0x5BU; } +unsigned stdc_bit_floor_ui(unsigned) noexcept { return 0x5CU; } +unsigned long stdc_bit_floor_ul(unsigned long) noexcept { return 0x5DU; } +unsigned long long stdc_bit_floor_ull(unsigned long long) noexcept { + return 0x5EU; +} } #include "include/llvm-libc-macros/stdbit-macros.h" @@ -190,3 +197,13 @@ TEST(LlvmLibcStdbitTest, TypeGenericMacroBitWidth) { EXPECT_EQ(stdc_bit_width(1UL), 0x4DU); EXPECT_EQ(stdc_bit_width(1ULL), 0x4EU); } + +TEST(LlvmLibcStdbitTest, TypeGenericMacroBitFloor) { + EXPECT_EQ(stdc_bit_floor(static_cast(0U)), + static_cast(0x5AU)); + EXPECT_EQ(stdc_bit_floor(static_cast(0U)), + static_cast(0x5BU)); + EXPECT_EQ(stdc_bit_floor(0U), 0x5CU); + EXPECT_EQ(stdc_bit_floor(0UL), 0x5DUL); + EXPECT_EQ(stdc_bit_floor(0ULL), 0x5EULL); +} diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 8c861b576f9b1b..adbacb9728ccd4 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -27,7 +27,9 @@ add_libc_test( SRCS math_extras_test.cpp DEPENDS + libc.src.__support.integer_literals libc.src.__support.math_extras + libc.src.__support.uint128 ) add_libc_test( diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index 115a5d505c4b7a..d3f56d5bad83d3 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -14,19 +14,40 @@ namespace LIBC_NAMESPACE::cpp { -using UnsignedTypes = - testing::TypeList>; + unsigned char, unsigned short, unsigned int, unsigned long, + unsigned long long>; + +using UnsignedTypes = testing::TypeList< +#if defined(__SIZEOF_INT128__) + __uint128_t, +#endif + unsigned char, unsigned short, unsigned int, unsigned long, + unsigned long long, cpp::UInt<128>>; TYPED_TEST(LlvmLibcBitTest, HasSingleBit, UnsignedTypes) { - EXPECT_FALSE(has_single_bit(T(0))); - EXPECT_FALSE(has_single_bit(~T(0))); + constexpr auto ZERO = T(0); + constexpr auto ALL_ONES = T(~ZERO); + EXPECT_FALSE(has_single_bit(ZERO)); + EXPECT_FALSE(has_single_bit(ALL_ONES)); + for (T value = 1; value; value <<= 1) EXPECT_TRUE(has_single_bit(value)); + + // We test that if two bits are set has_single_bit returns false. + // We do this by setting the highest or lowest bit depending or where the + // current bit is. This is a bit convoluted but it helps catch a bug on BigInt + // where we have to work on an element-by-element basis. + constexpr auto MIDPOINT = T(ALL_ONES / 2); + constexpr auto LSB = T(1); + constexpr auto MSB = T(~(ALL_ONES >> 1)); + for (T value = 1; value; value <<= 1) { + auto two_bits_value = value | ((value <= MIDPOINT) ? MSB : LSB); + EXPECT_FALSE(has_single_bit(two_bits_value)); + } } TYPED_TEST(LlvmLibcBitTest, CountLZero, UnsignedTypes) { @@ -206,42 +227,42 @@ TEST(LlvmLibcBitTest, Rotr) { rotr(0x12345678deadbeefULL, -19)); } -TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypesNoBigInt) { EXPECT_EQ(first_leading_zero(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_leading_zero(~(T(1) << i)), cpp::numeric_limits::digits - i); } -TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypesNoBigInt) { EXPECT_EQ(first_leading_one(static_cast(0)), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_leading_one(T(1) << i), cpp::numeric_limits::digits - i); } -TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypesNoBigInt) { EXPECT_EQ(first_trailing_zero(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_trailing_zero(~(T(1) << i)), i + 1); } -TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypesNoBigInt) { EXPECT_EQ(first_trailing_one(cpp::numeric_limits::max()), 0); for (int i = 0U; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(first_trailing_one(T(1) << i), i + 1); } -TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypes) { +TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) { EXPECT_EQ(count_zeros(T(0)), cpp::numeric_limits::digits); for (int i = 0; i != cpp::numeric_limits::digits; ++i) EXPECT_EQ(count_zeros(cpp::numeric_limits::max() >> i), i); } -TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypes) { - EXPECT_EQ(count_ones(T(0)), 0); +TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypesNoBigInt) { + EXPECT_EQ(popcount(T(0)), 0); for (int i = 0; i != cpp::numeric_limits::digits; ++i) - EXPECT_EQ(count_ones(cpp::numeric_limits::max() >> i), + EXPECT_EQ(popcount(cpp::numeric_limits::max() >> i), cpp::numeric_limits::digits - i); } diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp index f5c27d4fc0302b..760031569c81f1 100644 --- a/libc/test/src/__support/FPUtil/fpbits_test.cpp +++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp @@ -237,6 +237,8 @@ template constexpr auto make(Sign sign, FP fp) { return T::signaling_nan(sign); case FP::QUIET_NAN: return T::quiet_nan(sign); + default: + __builtin_unreachable(); } } diff --git a/libc/test/src/__support/math_extras_test.cpp b/libc/test/src/__support/math_extras_test.cpp index e55d995592cc1c..ed064363d446bb 100644 --- a/libc/test/src/__support/math_extras_test.cpp +++ b/libc/test/src/__support/math_extras_test.cpp @@ -6,34 +6,59 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/UInt128.h" // UInt128 +#include "src/__support/integer_literals.h" #include "src/__support/math_extras.h" #include "test/UnitTest/Test.h" namespace LIBC_NAMESPACE { TEST(LlvmLibcBlockMathExtrasTest, mask_trailing_ones) { - EXPECT_EQ(uint8_t(0), (mask_leading_ones())); - EXPECT_EQ(uint8_t(0), (mask_trailing_ones())); - EXPECT_EQ(uint16_t(0), (mask_leading_ones())); - EXPECT_EQ(uint16_t(0), (mask_trailing_ones())); - EXPECT_EQ(uint32_t(0), (mask_leading_ones())); - EXPECT_EQ(uint32_t(0), (mask_trailing_ones())); - EXPECT_EQ(uint64_t(0), (mask_leading_ones())); - EXPECT_EQ(uint64_t(0), (mask_trailing_ones())); - - EXPECT_EQ(uint32_t(0x00000003), (mask_trailing_ones())); - EXPECT_EQ(uint32_t(0xC0000000), (mask_leading_ones())); - - EXPECT_EQ(uint32_t(0x000007FF), (mask_trailing_ones())); - EXPECT_EQ(uint32_t(0xFFE00000), (mask_leading_ones())); - - EXPECT_EQ(uint32_t(0xFFFFFFFF), (mask_trailing_ones())); - EXPECT_EQ(uint32_t(0xFFFFFFFF), (mask_leading_ones())); - EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFFFF), (mask_trailing_ones())); - EXPECT_EQ(uint64_t(0xFFFFFFFFFFFFFFFF), (mask_leading_ones())); - - EXPECT_EQ(uint64_t(0x0000FFFFFFFFFFFF), (mask_trailing_ones())); - EXPECT_EQ(uint64_t(0xFFFFFFFFFFFF0000), (mask_leading_ones())); + EXPECT_EQ(0_u8, (mask_leading_ones())); + EXPECT_EQ(0_u8, (mask_trailing_ones())); + EXPECT_EQ(0_u16, (mask_leading_ones())); + EXPECT_EQ(0_u16, (mask_trailing_ones())); + EXPECT_EQ(0_u32, (mask_leading_ones())); + EXPECT_EQ(0_u32, (mask_trailing_ones())); + EXPECT_EQ(0_u64, (mask_leading_ones())); + EXPECT_EQ(0_u64, (mask_trailing_ones())); + + EXPECT_EQ(0x00000003_u32, (mask_trailing_ones())); + EXPECT_EQ(0xC0000000_u32, (mask_leading_ones())); + + EXPECT_EQ(0x000007FF_u32, (mask_trailing_ones())); + EXPECT_EQ(0xFFE00000_u32, (mask_leading_ones())); + + EXPECT_EQ(0xFFFFFFFF_u32, (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFF_u32, (mask_leading_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFF_u64, (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFF_u64, (mask_leading_ones())); + + EXPECT_EQ(0x0000FFFFFFFFFFFF_u64, (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFF0000_u64, (mask_leading_ones())); + + EXPECT_EQ(0_u128, (mask_trailing_ones())); + EXPECT_EQ(0_u128, (mask_leading_ones())); + + EXPECT_EQ(0x00000000000000007FFFFFFFFFFFFFFF_u128, + (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFE0000000000000000_u128, + (mask_leading_ones())); + + EXPECT_EQ(0x0000000000000000FFFFFFFFFFFFFFFF_u128, + (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFF0000000000000000_u128, + (mask_leading_ones())); + + EXPECT_EQ(0x0000000000000001FFFFFFFFFFFFFFFF_u128, + (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFF8000000000000000_u128, + (mask_leading_ones())); + + EXPECT_EQ(0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF_u128, + (mask_trailing_ones())); + EXPECT_EQ(0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF_u128, + (mask_leading_ones())); } } // namespace LIBC_NAMESPACE diff --git a/libc/test/src/stdbit/CMakeLists.txt b/libc/test/src/stdbit/CMakeLists.txt index f7e17d73229935..3aed56c0e92380 100644 --- a/libc/test/src/stdbit/CMakeLists.txt +++ b/libc/test/src/stdbit/CMakeLists.txt @@ -13,6 +13,7 @@ set(prefixes count_ones has_single_bit bit_width + bit_floor ) set(suffixes c s i l ll) foreach(prefix IN LISTS prefixes) diff --git a/libc/test/src/stdbit/stdc_bit_floor_uc_test.cpp b/libc/test/src/stdbit/stdc_bit_floor_uc_test.cpp new file mode 100644 index 00000000000000..254abd043d6e0c --- /dev/null +++ b/libc/test/src/stdbit/stdc_bit_floor_uc_test.cpp @@ -0,0 +1,22 @@ +//===-- Unittests for stdc_bit_floor_uc -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_bit_floor_uc.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcBitfloorUcTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_uc(0U), + static_cast(0)); +} + +TEST(LlvmLibcStdcBitfloorUcTest, Ones) { + for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_uc(UCHAR_MAX >> i), + static_cast(1 << (UCHAR_WIDTH - i - 1))); +} diff --git a/libc/test/src/stdbit/stdc_bit_floor_ui_test.cpp b/libc/test/src/stdbit/stdc_bit_floor_ui_test.cpp new file mode 100644 index 00000000000000..53790402a9bda9 --- /dev/null +++ b/libc/test/src/stdbit/stdc_bit_floor_ui_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_bit_floor_ui -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_bit_floor_ui.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcBitfloorUiTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ui(0U), 0U); +} + +TEST(LlvmLibcStdcBitfloorUiTest, Ones) { + for (unsigned i = 0U; i != INT_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ui(UINT_MAX >> i), + 1U << (UINT_WIDTH - i - 1)); +} diff --git a/libc/test/src/stdbit/stdc_bit_floor_ul_test.cpp b/libc/test/src/stdbit/stdc_bit_floor_ul_test.cpp new file mode 100644 index 00000000000000..1c574437e02b79 --- /dev/null +++ b/libc/test/src/stdbit/stdc_bit_floor_ul_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_bit_floor_ul -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_bit_floor_ul.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcBitfloorUlTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ul(0UL), 0UL); +} + +TEST(LlvmLibcStdcBitfloorUlTest, Ones) { + for (unsigned i = 0U; i != ULONG_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ul(ULONG_MAX >> i), + 1UL << (ULONG_WIDTH - i - 1)); +} diff --git a/libc/test/src/stdbit/stdc_bit_floor_ull_test.cpp b/libc/test/src/stdbit/stdc_bit_floor_ull_test.cpp new file mode 100644 index 00000000000000..4717d427a40a72 --- /dev/null +++ b/libc/test/src/stdbit/stdc_bit_floor_ull_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_bit_floor_ull ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_bit_floor_ull.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcBitfloorUllTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ull(0ULL), 0ULL); +} + +TEST(LlvmLibcStdcBitfloorUllTest, Ones) { + for (unsigned i = 0U; i != ULLONG_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_ull(ULLONG_MAX >> i), + 1ULL << (ULLONG_WIDTH - i - 1)); +} diff --git a/libc/test/src/stdbit/stdc_bit_floor_us_test.cpp b/libc/test/src/stdbit/stdc_bit_floor_us_test.cpp new file mode 100644 index 00000000000000..4df87fb079ba76 --- /dev/null +++ b/libc/test/src/stdbit/stdc_bit_floor_us_test.cpp @@ -0,0 +1,22 @@ +//===-- Unittests for stdc_bit_floor_us -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_bit_floor_us.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcBitfloorUsTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_us(0U), + static_cast(0)); +} + +TEST(LlvmLibcStdcBitfloorUsTest, Ones) { + for (unsigned i = 0U; i != USHRT_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_floor_us(USHRT_MAX >> i), + static_cast(1 << (USHRT_WIDTH - i - 1))); +} diff --git a/libc/test/src/stdfix/CMakeLists.txt b/libc/test/src/stdfix/CMakeLists.txt index d3e122884eb40e..74a1fb13127cc3 100644 --- a/libc/test/src/stdfix/CMakeLists.txt +++ b/libc/test/src/stdfix/CMakeLists.txt @@ -96,3 +96,39 @@ add_libc_test( libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.sqrt ) + +add_libc_test( + exphk_test + SUITE + libc-stdfix-tests + HDRS + ExpTest.h + SRCS + exphk_test.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.stdfix.exphk + libc.src.math.exp + libc.src.__support.CPP.bit + libc.src.__support.fixed_point.fx_rep + libc.src.__support.FPUtil.basic_operations +) + +add_libc_test( + expk_test + SUITE + libc-stdfix-tests + HDRS + ExpTest.h + SRCS + expk_test.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.stdfix.expk + libc.src.math.exp + libc.src.__support.CPP.bit + libc.src.__support.fixed_point.fx_rep + libc.src.__support.FPUtil.basic_operations +) diff --git a/libc/test/src/stdfix/ExpTest.h b/libc/test/src/stdfix/ExpTest.h new file mode 100644 index 00000000000000..e588cebf621b90 --- /dev/null +++ b/libc/test/src/stdfix/ExpTest.h @@ -0,0 +1,77 @@ +//===-- Utility class to test integer sqrt ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/fixed_point/fx_rep.h" +#include "src/__support/fixed_point/sqrt.h" + +#include "src/math/exp.h" + +template class ExpTest : public LIBC_NAMESPACE::testing::Test { + + using FXRep = LIBC_NAMESPACE::fixed_point::FXRep; + static constexpr T zero = FXRep::ZERO(); + static constexpr T one = static_cast(1); + static constexpr T eps = FXRep::EPS(); + +public: + typedef T (*ExpFunc)(T); + + void test_special_numbers(ExpFunc func) { + EXPECT_EQ(one, func(T(0))); + EXPECT_EQ(FXRep::MAX(), func(T(30))); + EXPECT_EQ(zero, func(T(-30))); + } + + void test_range_with_step(ExpFunc func, T step, bool rel_error) { + constexpr int COUNT = 255; + constexpr double ERR = 3.0 * static_cast(eps); + double x_d = 0.0; + T x = step; + for (int i = 0; i < COUNT; ++i) { + x += step; + x_d = static_cast(x); + double y_d = static_cast(func(x)); + double result = LIBC_NAMESPACE::exp(x_d); + double errors = rel_error + ? LIBC_NAMESPACE::fputil::abs((y_d / result) - 1.0) + : LIBC_NAMESPACE::fputil::abs(y_d - result); + if (errors > ERR) { + // Print out the failure input and output. + EXPECT_EQ(x, T(0)); + EXPECT_EQ(func(x), zero); + } + ASSERT_TRUE(errors <= ERR); + } + } + + void test_positive_range(ExpFunc func) { + test_range_with_step(func, T(0x1.0p-6), /*rel_error*/ true); + } + + void test_negative_range(ExpFunc func) { + test_range_with_step(func, T(-0x1.0p-6), /*rel_error*/ false); + } +}; + +#define LIST_EXP_TESTS(Name, T, func) \ + using LlvmLibcExp##Name##Test = ExpTest; \ + TEST_F(LlvmLibcExp##Name##Test, SpecialNumbers) { \ + test_special_numbers(&func); \ + } \ + TEST_F(LlvmLibcExp##Name##Test, PositiveRange) { \ + test_positive_range(&func); \ + } \ + TEST_F(LlvmLibcExp##Name##Test, NegativeRange) { \ + test_negative_range(&func); \ + } \ + static_assert(true, "Require semicolon.") diff --git a/libc/test/src/stdfix/RoundTest.h b/libc/test/src/stdfix/RoundTest.h index 06343addbef20e..d3ae04db9749ba 100644 --- a/libc/test/src/stdfix/RoundTest.h +++ b/libc/test/src/stdfix/RoundTest.h @@ -28,7 +28,7 @@ template class RoundTest : public LIBC_NAMESPACE::testing::Test { void testSpecialNumbers(RoundFunc func) { EXPECT_EQ(zero, func(zero, FXRep::FRACTION_LEN - 5)); - EXPECT_EQ(max, func(min, 0)); + EXPECT_EQ(min, func(min, 0)); EXPECT_EQ(max, func(max, FXRep::FRACTION_LEN)); EXPECT_EQ(one, func(half, 0)); diff --git a/libc/test/src/stdfix/exphk_test.cpp b/libc/test/src/stdfix/exphk_test.cpp new file mode 100644 index 00000000000000..24e92dc902faea --- /dev/null +++ b/libc/test/src/stdfix/exphk_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for exphk -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ExpTest.h" + +#include "src/stdfix/exphk.h" + +LIST_EXP_TESTS(hk, short accum, LIBC_NAMESPACE::exphk); diff --git a/libc/test/src/stdfix/expk_test.cpp b/libc/test/src/stdfix/expk_test.cpp new file mode 100644 index 00000000000000..bc322037af04a7 --- /dev/null +++ b/libc/test/src/stdfix/expk_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for expk ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ExpTest.h" + +#include "src/stdfix/expk.h" + +LIST_EXP_TESTS(k, accum, LIBC_NAMESPACE::expk); diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt index 6088289532d771..c1caec5fd912c8 100644 --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -418,6 +418,16 @@ add_libc_test( libc.src.string.strxfrm ) +add_libc_test( + memset_explicit_test + SUITE + libc-string-tests + SRCS + memset_explicit_test.cpp + DEPENDS + libc.src.string.memset_explicit +) + # Tests all implementations that can run on the target CPU. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) diff --git a/libc/test/src/string/memset_explicit_test.cpp b/libc/test/src/string/memset_explicit_test.cpp new file mode 100644 index 00000000000000..bb5111bd639e3a --- /dev/null +++ b/libc/test/src/string/memset_explicit_test.cpp @@ -0,0 +1,31 @@ +//===-- Unittests for memset_explicit -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "memory_utils/memory_check_utils.h" +#include "src/string/memset_explicit.h" +#include "test/UnitTest/Test.h" + +namespace LIBC_NAMESPACE { + +// Apply the same tests as memset + +static inline void Adaptor(cpp::span p1, uint8_t value, size_t size) { + LIBC_NAMESPACE::memset_explicit(p1.begin(), value, size); +} + +TEST(LlvmLibcmemsetExplicitTest, SizeSweep) { + static constexpr size_t kMaxSize = 400; + Buffer DstBuffer(kMaxSize); + for (size_t size = 0; size < kMaxSize; ++size) { + const char value = size % 10; + auto dst = DstBuffer.span().subspan(0, size); + ASSERT_TRUE((CheckMemset(dst, value, size))); + } +} + +} // namespace LIBC_NAMESPACE diff --git a/libcxx/include/__availability b/libcxx/include/__availability index 78438c55a3b7ba..bb3ed0a8da521b 100644 --- a/libcxx/include/__availability +++ b/libcxx/include/__availability @@ -72,11 +72,10 @@ # endif #endif -// Availability markup is disabled when building the library, or when the compiler +// Availability markup is disabled when building the library, or when a non-Clang +// compiler is used because only Clang supports the necessary attributes. // doesn't support the proper attributes. -#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || \ - !__has_feature(attribute_availability_with_strict) || !__has_feature(attribute_availability_in_templates) || \ - !__has_extension(pragma_clang_attribute_external_declaration) +#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED) # if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) # define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS # endif diff --git a/libcxx/include/__type_traits/is_convertible.h b/libcxx/include/__type_traits/is_convertible.h index bc91d8b234308a..414c2a6d6a0de0 100644 --- a/libcxx/include/__type_traits/is_convertible.h +++ b/libcxx/include/__type_traits/is_convertible.h @@ -11,12 +11,6 @@ #include <__config> #include <__type_traits/integral_constant.h> -#include <__type_traits/is_array.h> -#include <__type_traits/is_function.h> -#include <__type_traits/is_void.h> -#include <__type_traits/remove_reference.h> -#include <__utility/declval.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp b/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp new file mode 100644 index 00000000000000..c55a0a4d6e5d1b --- /dev/null +++ b/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: stdlib=apple-libc++ + +// Test that using -pedantic-errors doesn't turn off availability annotations. +// This used to be the case because we used __has_extension(...) to enable the +// availability annotations, and -pedantic-errors changes the behavior of +// __has_extension(...) in an incompatible way. + +// ADDITIONAL_COMPILE_FLAGS: -pedantic-errors + +#include <__availability> + +#if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +# error Availability annotations should be enabled on Apple platforms in the system configuration! +#endif diff --git a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp index aec6e78d994da9..b07282593d759c 100644 --- a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp @@ -821,7 +821,9 @@ static void test_valid_values_date_time() { // Use the global locale (fr_FR) check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='jeu. 01 janv. 1970 00:00:00 UTC'\t%Ec='jeu. 01 janv. 1970 00:00:00 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='jeu. 01 janv. 1970 00:00:00 GMT'\t%Ec='jeu. 01 janv. 1970 00:00:00 GMT'\n"), #elif defined(_AIX) SV("%c=' 1 janvier 1970 à 00:00:00 UTC'\t%Ec=' 1 janvier 1970 à 00:00:00 UTC'\n"), @@ -839,7 +841,9 @@ static void test_valid_values_date_time() { check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='ven. 13 févr. 2009 23:31:30 UTC'\t%Ec='ven. 13 févr. 2009 23:31:30 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='ven. 13 févr. 2009 23:31:30 GMT'\t%Ec='ven. 13 févr. 2009 23:31:30 GMT'\n"), #elif defined(_AIX) SV("%c='13 février 2009 à 23:31:30 UTC'\t%Ec='13 février 2009 à 23:31:30 UTC'\n"), diff --git a/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp index 71dd7fba9fb701..45c3a12ea35cb2 100644 --- a/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp @@ -820,7 +820,9 @@ static void test_valid_values_date_time() { // Use the global locale (fr_FR) check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='jeu. 01 janv. 1970 00:00:00 UTC'\t%Ec='jeu. 01 janv. 1970 00:00:00 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='jeu. 01 janv. 1970 00:00:00 GMT'\t%Ec='jeu. 01 janv. 1970 00:00:00 GMT'\n"), #elif defined(_AIX) SV("%c=' 1 janvier 1970 à 00:00:00 UTC'\t%Ec=' 1 janvier 1970 à 00:00:00 UTC'\n"), @@ -838,7 +840,9 @@ static void test_valid_values_date_time() { check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='ven. 13 févr. 2009 23:31:30 UTC'\t%Ec='ven. 13 févr. 2009 23:31:30 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='ven. 13 févr. 2009 23:31:30 GMT'\t%Ec='ven. 13 févr. 2009 23:31:30 GMT'\n"), #elif defined(_AIX) SV("%c='13 février 2009 à 23:31:30 UTC'\t%Ec='13 février 2009 à 23:31:30 UTC'\n"), diff --git a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp index ebc45c7e87351f..2fed270cbade72 100644 --- a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp @@ -817,7 +817,9 @@ static void test_valid_values_date_time() { // Use the global locale (fr_FR) check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='jeu. 01 janv. 1970 00:00:00 UTC'\t%Ec='jeu. 01 janv. 1970 00:00:00 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='jeu. 01 janv. 1970 00:00:00 GMT'\t%Ec='jeu. 01 janv. 1970 00:00:00 GMT'\n"), #elif defined(_AIX) SV("%c=' 1 janvier 1970 à 00:00:00 UTC'\t%Ec=' 1 janvier 1970 à 00:00:00 UTC'\n"), @@ -835,7 +837,9 @@ static void test_valid_values_date_time() { check( // https://sourceware.org/bugzilla/show_bug.cgi?id=24054 -#if defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='ven. 13 févr. 2009 23:31:30 UTC'\t%Ec='ven. 13 févr. 2009 23:31:30 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 SV("%c='ven. 13 févr. 2009 23:31:30 GMT'\t%Ec='ven. 13 févr. 2009 23:31:30 GMT'\n"), #elif defined(_AIX) SV("%c='13 février 2009 à 23:31:30 UTC'\t%Ec='13 février 2009 à 23:31:30 UTC'\n"), diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 93c8ffe4b7d8a0..5d62213c3f5838 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -68,7 +68,7 @@ option(LLDB_SKIP_DSYM "Whether to skip generating a dSYM when installing lldb." option(LLDB_ENFORCE_STRICT_TEST_REQUIREMENTS "Fail to configure if certain requirements are not met for testing." OFF) option(LLDB_TEST_USE_VENDOR_PACKAGES - "Use packages from lldb/third_party/Python/module instead of system deps." ON) + "Use packages from lldb/third_party/Python/module instead of system deps." OFF) set(LLDB_GLOBAL_INIT_DIRECTORY "" CACHE STRING "Path to the global lldbinit directory. Relative paths are resolved relative to the diff --git a/lldb/include/lldb/Core/Progress.h b/lldb/include/lldb/Core/Progress.h index c6fc861fb71d86..c38f6dd0a140ed 100644 --- a/lldb/include/lldb/Core/Progress.h +++ b/lldb/include/lldb/Core/Progress.h @@ -148,9 +148,14 @@ class ProgressManager { static ProgressManager &Instance(); - static void ReportProgress(const Progress::ProgressData &); - private: + enum class EventType { + Begin, + End, + }; + static void ReportProgress(const Progress::ProgressData &progress_data, + EventType type); + llvm::StringMap> m_progress_category_map; std::mutex m_progress_map_mutex; diff --git a/lldb/include/lldb/Core/ValueObject.h b/lldb/include/lldb/Core/ValueObject.h index 4c0b0b2dae6cd4..b4d2c8098edc71 100644 --- a/lldb/include/lldb/Core/ValueObject.h +++ b/lldb/include/lldb/Core/ValueObject.h @@ -465,7 +465,7 @@ class ValueObject { /// Returns a unique id for this ValueObject. lldb::user_id_t GetID() const { return m_id.GetID(); } - virtual lldb::ValueObjectSP GetChildAtIndex(size_t idx, + virtual lldb::ValueObjectSP GetChildAtIndex(uint32_t idx, bool can_create = true); // The method always creates missing children in the path, if necessary. @@ -476,7 +476,7 @@ class ValueObject { virtual size_t GetIndexOfChildWithName(llvm::StringRef name); - size_t GetNumChildren(uint32_t max = UINT32_MAX); + uint32_t GetNumChildren(uint32_t max = UINT32_MAX); const Value &GetValue() const { return m_value; } @@ -791,7 +791,7 @@ class ValueObject { return (m_children.find(idx) != m_children.end()); } - ValueObject *GetChildAtIndex(size_t idx) { + ValueObject *GetChildAtIndex(uint32_t idx) { std::lock_guard guard(m_mutex); const auto iter = m_children.find(idx); return ((iter == m_children.end()) ? nullptr : iter->second); @@ -958,9 +958,9 @@ class ValueObject { int32_t synthetic_index); /// Should only be called by ValueObject::GetNumChildren(). - virtual size_t CalculateNumChildren(uint32_t max = UINT32_MAX) = 0; + virtual uint32_t CalculateNumChildren(uint32_t max = UINT32_MAX) = 0; - void SetNumChildren(size_t num_children); + void SetNumChildren(uint32_t num_children); void SetValueDidChange(bool value_changed) { m_flags.m_value_did_change = value_changed; diff --git a/lldb/include/lldb/Core/ValueObjectCast.h b/lldb/include/lldb/Core/ValueObjectCast.h index fe053c12d9c343..51c647680d5227 100644 --- a/lldb/include/lldb/Core/ValueObjectCast.h +++ b/lldb/include/lldb/Core/ValueObjectCast.h @@ -33,7 +33,7 @@ class ValueObjectCast : public ValueObject { std::optional GetByteSize() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueType GetValueType() const override; diff --git a/lldb/include/lldb/Core/ValueObjectChild.h b/lldb/include/lldb/Core/ValueObjectChild.h index 46b14e6840f0dc..47a13be08bb83b 100644 --- a/lldb/include/lldb/Core/ValueObjectChild.h +++ b/lldb/include/lldb/Core/ValueObjectChild.h @@ -39,7 +39,7 @@ class ValueObjectChild : public ValueObject { lldb::ValueType GetValueType() const override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; ConstString GetTypeName() override; diff --git a/lldb/include/lldb/Core/ValueObjectConstResult.h b/lldb/include/lldb/Core/ValueObjectConstResult.h index d61df859bebce4..9f1246cf2a7874 100644 --- a/lldb/include/lldb/Core/ValueObjectConstResult.h +++ b/lldb/include/lldb/Core/ValueObjectConstResult.h @@ -67,7 +67,7 @@ class ValueObjectConstResult : public ValueObject { lldb::ValueType GetValueType() const override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; ConstString GetTypeName() override; diff --git a/lldb/include/lldb/Core/ValueObjectDynamicValue.h b/lldb/include/lldb/Core/ValueObjectDynamicValue.h index 2758b4e5bb564d..21a9b409fd5bd7 100644 --- a/lldb/include/lldb/Core/ValueObjectDynamicValue.h +++ b/lldb/include/lldb/Core/ValueObjectDynamicValue.h @@ -43,7 +43,7 @@ class ValueObjectDynamicValue : public ValueObject { ConstString GetDisplayTypeName() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueType GetValueType() const override; diff --git a/lldb/include/lldb/Core/ValueObjectMemory.h b/lldb/include/lldb/Core/ValueObjectMemory.h index 3c01df388d2e6d..a74b325546b03c 100644 --- a/lldb/include/lldb/Core/ValueObjectMemory.h +++ b/lldb/include/lldb/Core/ValueObjectMemory.h @@ -47,7 +47,7 @@ class ValueObjectMemory : public ValueObject { ConstString GetDisplayTypeName() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueType GetValueType() const override; diff --git a/lldb/include/lldb/Core/ValueObjectRegister.h b/lldb/include/lldb/Core/ValueObjectRegister.h index 2e47eee3d7f793..6c470c1a686503 100644 --- a/lldb/include/lldb/Core/ValueObjectRegister.h +++ b/lldb/include/lldb/Core/ValueObjectRegister.h @@ -47,7 +47,7 @@ class ValueObjectRegisterSet : public ValueObject { ConstString GetQualifiedTypeName() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; ValueObject *CreateChildAtIndex(size_t idx, bool synthetic_array_member, int32_t synthetic_index) override; @@ -95,7 +95,7 @@ class ValueObjectRegister : public ValueObject { ConstString GetTypeName() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; bool SetValueFromCString(const char *value_str, Status &error) override; diff --git a/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h b/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h index 67596232eafd1e..1e54babc94f395 100644 --- a/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h +++ b/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h @@ -47,11 +47,11 @@ class ValueObjectSynthetic : public ValueObject { bool MightHaveChildren() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueType GetValueType() const override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx, + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx, bool can_create = true) override; lldb::ValueObjectSP GetChildMemberWithName(llvm::StringRef name, diff --git a/lldb/include/lldb/Core/ValueObjectVTable.h b/lldb/include/lldb/Core/ValueObjectVTable.h index 217ff8d0d334ce..e7e14fc83d7892 100644 --- a/lldb/include/lldb/Core/ValueObjectVTable.h +++ b/lldb/include/lldb/Core/ValueObjectVTable.h @@ -64,7 +64,7 @@ class ValueObjectVTable : public ValueObject { std::optional GetByteSize() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; ValueObject *CreateChildAtIndex(size_t idx, bool synthetic_array_member, int32_t synthetic_index) override; diff --git a/lldb/include/lldb/Core/ValueObjectVariable.h b/lldb/include/lldb/Core/ValueObjectVariable.h index bba28ce567b2a0..da270300df0b30 100644 --- a/lldb/include/lldb/Core/ValueObjectVariable.h +++ b/lldb/include/lldb/Core/ValueObjectVariable.h @@ -46,7 +46,7 @@ class ValueObjectVariable : public ValueObject { ConstString GetDisplayTypeName() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; lldb::ValueType GetValueType() const override; diff --git a/lldb/include/lldb/DataFormatters/TypeSynthetic.h b/lldb/include/lldb/DataFormatters/TypeSynthetic.h index 23cc054b399a67..38f3ce0fa5f011 100644 --- a/lldb/include/lldb/DataFormatters/TypeSynthetic.h +++ b/lldb/include/lldb/DataFormatters/TypeSynthetic.h @@ -38,14 +38,14 @@ class SyntheticChildrenFrontEnd { virtual ~SyntheticChildrenFrontEnd() = default; - virtual size_t CalculateNumChildren() = 0; + virtual uint32_t CalculateNumChildren() = 0; - virtual size_t CalculateNumChildren(uint32_t max) { + virtual uint32_t CalculateNumChildren(uint32_t max) { auto count = CalculateNumChildren(); return count <= max ? count : max; } - virtual lldb::ValueObjectSP GetChildAtIndex(size_t idx) = 0; + virtual lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) = 0; virtual size_t GetIndexOfChildWithName(ConstString name) = 0; @@ -109,9 +109,9 @@ class SyntheticValueProviderFrontEnd : public SyntheticChildrenFrontEnd { ~SyntheticValueProviderFrontEnd() override = default; - size_t CalculateNumChildren() override { return 0; } + uint32_t CalculateNumChildren() override { return 0; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { return nullptr; } + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { return nullptr; } size_t GetIndexOfChildWithName(ConstString name) override { return UINT32_MAX; @@ -322,9 +322,9 @@ class TypeFilterImpl : public SyntheticChildren { ~FrontEnd() override = default; - size_t CalculateNumChildren() override { return filter->GetCount(); } + uint32_t CalculateNumChildren() override { return filter->GetCount(); } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { if (idx >= filter->GetCount()) return lldb::ValueObjectSP(); return m_backend.GetSyntheticExpressionPathChild( @@ -426,11 +426,11 @@ class ScriptedSyntheticChildren : public SyntheticChildren { bool IsValid(); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - size_t CalculateNumChildren(uint32_t max) override; + uint32_t CalculateNumChildren(uint32_t max) override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; diff --git a/lldb/include/lldb/DataFormatters/VectorIterator.h b/lldb/include/lldb/DataFormatters/VectorIterator.h index 5f774bb72c3a3a..7711b9de95dba8 100644 --- a/lldb/include/lldb/DataFormatters/VectorIterator.h +++ b/lldb/include/lldb/DataFormatters/VectorIterator.h @@ -24,9 +24,9 @@ class VectorIteratorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { VectorIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp, llvm::ArrayRef item_names); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; diff --git a/lldb/include/lldb/Target/StackFrameRecognizer.h b/lldb/include/lldb/Target/StackFrameRecognizer.h index 419f0c0aac1f86..e111f4a4dc7029 100644 --- a/lldb/include/lldb/Target/StackFrameRecognizer.h +++ b/lldb/include/lldb/Target/StackFrameRecognizer.h @@ -164,7 +164,7 @@ class ValueObjectRecognizerSynthesizedValue : public ValueObject { m_value = m_parent->GetValue(); return true; } - size_t CalculateNumChildren(uint32_t max = UINT32_MAX) override { + uint32_t CalculateNumChildren(uint32_t max = UINT32_MAX) override { return m_parent->GetNumChildren(max); } CompilerType GetCompilerTypeImpl() override { diff --git a/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py b/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py index c4d063d3cc77ef..187d16aa1baa68 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py +++ b/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py @@ -3,6 +3,7 @@ # System modules import itertools +import json import re import subprocess import sys @@ -16,6 +17,7 @@ from . import lldbtest_config import lldbsuite.test.lldbplatform as lldbplatform from lldbsuite.test.builders import get_builder +from lldbsuite.test.lldbutil import is_exe def check_first_register_readable(test_case): @@ -333,3 +335,28 @@ def expectedCompiler(compilers): return True return False + + +# This is a helper function to determine if a specific version of Xcode's linker +# contains a TLS bug. We want to skip TLS tests if they contain this bug, but +# adding a linker/linker_version conditions to a decorator is challenging due to +# the number of ways linkers can enter the build process. +def xcode15LinkerBug(): + """Returns true iff a test is running on a darwin platform and the host linker is between versions 1000 and 1109.""" + darwin_platforms = lldbplatform.translate(lldbplatform.darwin_all) + if getPlatform() not in darwin_platforms: + return False + + try: + raw_version_details = subprocess.check_output( + ("xcrun", "ld", "-version_details") + ) + version_details = json.loads(raw_version_details) + version = version_details.get("version", "0") + version_tuple = tuple(int(x) for x in version.split(".")) + if (1000,) <= version_tuple <= (1109,): + return True + except: + pass + + return False diff --git a/lldb/source/Core/Progress.cpp b/lldb/source/Core/Progress.cpp index 9dcd7cf75ae057..b4b5e98b7ba493 100644 --- a/lldb/source/Core/Progress.cpp +++ b/lldb/source/Core/Progress.cpp @@ -97,7 +97,7 @@ void ProgressManager::Increment(const Progress::ProgressData &progress_data) { // initial progress report. if (!m_progress_category_map.contains(progress_data.title)) { m_progress_category_map[progress_data.title].second = progress_data; - ReportProgress(progress_data); + ReportProgress(progress_data, EventType::Begin); } m_progress_category_map[progress_data.title].first++; } @@ -110,7 +110,7 @@ void ProgressManager::Decrement(const Progress::ProgressData &progress_data) { return; if (pos->second.first <= 1) { - ReportProgress(pos->second.second); + ReportProgress(pos->second.second, EventType::End); m_progress_category_map.erase(progress_data.title); } else { --pos->second.first; @@ -118,12 +118,14 @@ void ProgressManager::Decrement(const Progress::ProgressData &progress_data) { } void ProgressManager::ReportProgress( - const Progress::ProgressData &progress_data) { + const Progress::ProgressData &progress_data, EventType type) { // The category bit only keeps track of when progress report categories have // started and ended, so clear the details and reset other fields when // broadcasting to it since that bit doesn't need that information. - Debugger::ReportProgress( - progress_data.progress_id, progress_data.title, "", - Progress::kNonDeterministicTotal, Progress::kNonDeterministicTotal, - progress_data.debugger_id, Debugger::eBroadcastBitProgressCategory); + const uint64_t completed = + (type == EventType::Begin) ? 0 : Progress::kNonDeterministicTotal; + Debugger::ReportProgress(progress_data.progress_id, progress_data.title, "", + completed, Progress::kNonDeterministicTotal, + progress_data.debugger_id, + Debugger::eBroadcastBitProgressCategory); } diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp index 840b100c70ddaa..0ed7f03be25c16 100644 --- a/lldb/source/Core/ValueObject.cpp +++ b/lldb/source/Core/ValueObject.cpp @@ -372,7 +372,7 @@ bool ValueObject::IsLogicalTrue(Status &error) { return ret; } -ValueObjectSP ValueObject::GetChildAtIndex(size_t idx, bool can_create) { +ValueObjectSP ValueObject::GetChildAtIndex(uint32_t idx, bool can_create) { ValueObjectSP child_sp; // We may need to update our value if we are dynamic if (IsPossibleDynamicType()) @@ -440,7 +440,7 @@ ValueObjectSP ValueObject::GetChildMemberWithName(llvm::StringRef name, return child_sp; } -size_t ValueObject::GetNumChildren(uint32_t max) { +uint32_t ValueObject::GetNumChildren(uint32_t max) { UpdateValueIfNeeded(); if (max < UINT32_MAX) { @@ -470,7 +470,7 @@ bool ValueObject::MightHaveChildren() { } // Should only be called by ValueObject::GetNumChildren() -void ValueObject::SetNumChildren(size_t num_children) { +void ValueObject::SetNumChildren(uint32_t num_children) { m_flags.m_children_count_valid = true; m_children.SetChildrenCount(num_children); } diff --git a/lldb/source/Core/ValueObjectCast.cpp b/lldb/source/Core/ValueObjectCast.cpp index 0882d4b3677619..a5c555f86b1372 100644 --- a/lldb/source/Core/ValueObjectCast.cpp +++ b/lldb/source/Core/ValueObjectCast.cpp @@ -41,7 +41,7 @@ ValueObjectCast::~ValueObjectCast() = default; CompilerType ValueObjectCast::GetCompilerTypeImpl() { return m_cast_type; } -size_t ValueObjectCast::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectCast::CalculateNumChildren(uint32_t max) { ExecutionContext exe_ctx(GetExecutionContextRef()); auto children_count = GetCompilerType().GetNumChildren( true, &exe_ctx); diff --git a/lldb/source/Core/ValueObjectChild.cpp b/lldb/source/Core/ValueObjectChild.cpp index 39067387dc9782..2e55dd7726bdd9 100644 --- a/lldb/source/Core/ValueObjectChild.cpp +++ b/lldb/source/Core/ValueObjectChild.cpp @@ -49,7 +49,7 @@ lldb::ValueType ValueObjectChild::GetValueType() const { return m_parent->GetValueType(); } -size_t ValueObjectChild::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectChild::CalculateNumChildren(uint32_t max) { ExecutionContext exe_ctx(GetExecutionContextRef()); auto children_count = GetCompilerType().GetNumChildren(true, &exe_ctx); return children_count <= max ? children_count : max; diff --git a/lldb/source/Core/ValueObjectConstResult.cpp b/lldb/source/Core/ValueObjectConstResult.cpp index 693da1a551f8eb..5c7aa4452b70db 100644 --- a/lldb/source/Core/ValueObjectConstResult.cpp +++ b/lldb/source/Core/ValueObjectConstResult.cpp @@ -216,7 +216,7 @@ std::optional ValueObjectConstResult::GetByteSize() { void ValueObjectConstResult::SetByteSize(size_t size) { m_byte_size = size; } -size_t ValueObjectConstResult::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectConstResult::CalculateNumChildren(uint32_t max) { ExecutionContext exe_ctx(GetExecutionContextRef()); auto children_count = GetCompilerType().GetNumChildren(true, &exe_ctx); return children_count <= max ? children_count : max; diff --git a/lldb/source/Core/ValueObjectDynamicValue.cpp b/lldb/source/Core/ValueObjectDynamicValue.cpp index e6e30dce9d1e4a..4e64760371ae52 100644 --- a/lldb/source/Core/ValueObjectDynamicValue.cpp +++ b/lldb/source/Core/ValueObjectDynamicValue.cpp @@ -85,7 +85,7 @@ ConstString ValueObjectDynamicValue::GetDisplayTypeName() { return m_parent->GetDisplayTypeName(); } -size_t ValueObjectDynamicValue::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectDynamicValue::CalculateNumChildren(uint32_t max) { const bool success = UpdateValueIfNeeded(false); if (success && m_dynamic_type_info.HasType()) { ExecutionContext exe_ctx(GetExecutionContextRef()); diff --git a/lldb/source/Core/ValueObjectMemory.cpp b/lldb/source/Core/ValueObjectMemory.cpp index 3f125a7bee8c77..7f68236c7884ec 100644 --- a/lldb/source/Core/ValueObjectMemory.cpp +++ b/lldb/source/Core/ValueObjectMemory.cpp @@ -126,7 +126,7 @@ ConstString ValueObjectMemory::GetDisplayTypeName() { return m_compiler_type.GetDisplayTypeName(); } -size_t ValueObjectMemory::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectMemory::CalculateNumChildren(uint32_t max) { if (m_type_sp) { auto child_count = m_type_sp->GetNumChildren(true); return child_count <= max ? child_count : max; diff --git a/lldb/source/Core/ValueObjectRegister.cpp b/lldb/source/Core/ValueObjectRegister.cpp index c2b84c11347359..d4c144cc7edb9a 100644 --- a/lldb/source/Core/ValueObjectRegister.cpp +++ b/lldb/source/Core/ValueObjectRegister.cpp @@ -74,7 +74,7 @@ ConstString ValueObjectRegisterSet::GetQualifiedTypeName() { return ConstString(); } -size_t ValueObjectRegisterSet::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectRegisterSet::CalculateNumChildren(uint32_t max) { const RegisterSet *reg_set = m_reg_ctx_sp->GetRegisterSet(m_reg_set_idx); if (reg_set) { auto reg_count = reg_set->num_registers; @@ -220,7 +220,7 @@ ConstString ValueObjectRegister::GetTypeName() { return m_type_name; } -size_t ValueObjectRegister::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectRegister::CalculateNumChildren(uint32_t max) { ExecutionContext exe_ctx(GetExecutionContextRef()); auto children_count = GetCompilerType().GetNumChildren(true, &exe_ctx); return children_count <= max ? children_count : max; diff --git a/lldb/source/Core/ValueObjectSyntheticFilter.cpp b/lldb/source/Core/ValueObjectSyntheticFilter.cpp index e8b4b02d11a0bb..7f8a9a34cb35df 100644 --- a/lldb/source/Core/ValueObjectSyntheticFilter.cpp +++ b/lldb/source/Core/ValueObjectSyntheticFilter.cpp @@ -31,9 +31,9 @@ class DummySyntheticFrontEnd : public SyntheticChildrenFrontEnd { DummySyntheticFrontEnd(ValueObject &backend) : SyntheticChildrenFrontEnd(backend) {} - size_t CalculateNumChildren() override { return m_backend.GetNumChildren(); } + uint32_t CalculateNumChildren() override { return m_backend.GetNumChildren(); } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { return m_backend.GetChildAtIndex(idx); } @@ -84,7 +84,7 @@ ConstString ValueObjectSynthetic::GetDisplayTypeName() { return m_parent->GetDisplayTypeName(); } -size_t ValueObjectSynthetic::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectSynthetic::CalculateNumChildren(uint32_t max) { Log *log = GetLog(LLDBLog::DataFormatters); UpdateValueIfNeeded(); @@ -236,13 +236,13 @@ bool ValueObjectSynthetic::UpdateValue() { return true; } -lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, +lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(uint32_t idx, bool can_create) { Log *log = GetLog(LLDBLog::DataFormatters); LLDB_LOGF(log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, retrieving " - "child at index %zu", + "child at index %u", GetName().AsCString(), idx); UpdateValueIfNeeded(); @@ -261,7 +261,7 @@ lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, if (can_create && m_synth_filter_up != nullptr) { LLDB_LOGF(log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, child at " - "index %zu not cached and will be created", + "index %u not cached and will be created", GetName().AsCString(), idx); lldb::ValueObjectSP synth_guy = m_synth_filter_up->GetChildAtIndex(idx); @@ -269,7 +269,7 @@ lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, LLDB_LOGF( log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, child at index " - "%zu created as %p (is " + "%u created as %p (is " "synthetic: %s)", GetName().AsCString(), idx, static_cast(synth_guy.get()), synth_guy.get() @@ -291,7 +291,7 @@ lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, } else { LLDB_LOGF(log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, child at " - "index %zu not cached and cannot " + "index %u not cached and cannot " "be created (can_create = %s, synth_filter = %p)", GetName().AsCString(), idx, can_create ? "yes" : "no", static_cast(m_synth_filter_up.get())); @@ -301,7 +301,7 @@ lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, } else { LLDB_LOGF(log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, child at " - "index %zu cached as %p", + "index %u cached as %p", GetName().AsCString(), idx, static_cast(valobj)); return valobj->GetSP(); diff --git a/lldb/source/Core/ValueObjectVTable.cpp b/lldb/source/Core/ValueObjectVTable.cpp index 177ae4167a1d45..4d1cbb8d2f6fc2 100644 --- a/lldb/source/Core/ValueObjectVTable.cpp +++ b/lldb/source/Core/ValueObjectVTable.cpp @@ -33,7 +33,7 @@ class ValueObjectVTableChild : public ValueObject { std::optional GetByteSize() override { return m_addr_size; }; - size_t CalculateNumChildren(uint32_t max) override { return 0; }; + uint32_t CalculateNumChildren(uint32_t max) override { return 0; }; ValueType GetValueType() const override { return eValueTypeVTableEntry; }; @@ -159,7 +159,7 @@ std::optional ValueObjectVTable::GetByteSize() { return std::nullopt; } -size_t ValueObjectVTable::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectVTable::CalculateNumChildren(uint32_t max) { if (UpdateValueIfNeeded(false)) return m_num_vtable_entries <= max ? m_num_vtable_entries : max; return 0; diff --git a/lldb/source/Core/ValueObjectVariable.cpp b/lldb/source/Core/ValueObjectVariable.cpp index 9f8df847f28a8e..dc62bb6358dc97 100644 --- a/lldb/source/Core/ValueObjectVariable.cpp +++ b/lldb/source/Core/ValueObjectVariable.cpp @@ -94,7 +94,7 @@ ConstString ValueObjectVariable::GetQualifiedTypeName() { return ConstString(); } -size_t ValueObjectVariable::CalculateNumChildren(uint32_t max) { +uint32_t ValueObjectVariable::CalculateNumChildren(uint32_t max) { CompilerType type(GetCompilerType()); if (!type.IsValid()) diff --git a/lldb/source/DataFormatters/TypeSynthetic.cpp b/lldb/source/DataFormatters/TypeSynthetic.cpp index 8a6f132a39577a..0ae38c4d31f26b 100644 --- a/lldb/source/DataFormatters/TypeSynthetic.cpp +++ b/lldb/source/DataFormatters/TypeSynthetic.cpp @@ -167,7 +167,7 @@ ScriptedSyntheticChildren::FrontEnd::FrontEnd(std::string pclass, ScriptedSyntheticChildren::FrontEnd::~FrontEnd() = default; lldb::ValueObjectSP -ScriptedSyntheticChildren::FrontEnd::GetChildAtIndex(size_t idx) { +ScriptedSyntheticChildren::FrontEnd::GetChildAtIndex(uint32_t idx) { if (!m_wrapper_sp || !m_interpreter) return lldb::ValueObjectSP(); @@ -178,13 +178,13 @@ bool ScriptedSyntheticChildren::FrontEnd::IsValid() { return (m_wrapper_sp && m_wrapper_sp->IsValid() && m_interpreter); } -size_t ScriptedSyntheticChildren::FrontEnd::CalculateNumChildren() { +uint32_t ScriptedSyntheticChildren::FrontEnd::CalculateNumChildren() { if (!m_wrapper_sp || m_interpreter == nullptr) return 0; return m_interpreter->CalculateNumChildren(m_wrapper_sp, UINT32_MAX); } -size_t ScriptedSyntheticChildren::FrontEnd::CalculateNumChildren(uint32_t max) { +uint32_t ScriptedSyntheticChildren::FrontEnd::CalculateNumChildren(uint32_t max) { if (!m_wrapper_sp || m_interpreter == nullptr) return 0; return m_interpreter->CalculateNumChildren(m_wrapper_sp, max); diff --git a/lldb/source/DataFormatters/VectorType.cpp b/lldb/source/DataFormatters/VectorType.cpp index c94ca68319ee2c..a0626a8cba7788 100644 --- a/lldb/source/DataFormatters/VectorType.cpp +++ b/lldb/source/DataFormatters/VectorType.cpp @@ -224,9 +224,9 @@ class VectorTypeSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~VectorTypeSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override { return m_num_children; } + uint32_t CalculateNumChildren() override { return m_num_children; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { if (idx >= CalculateNumChildren()) return {}; std::optional size = m_child_type.GetByteSize(nullptr); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp index 2d778e410b0e73..024fc75a5dd590 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp @@ -67,12 +67,11 @@ class StoringDiagnosticConsumer : public clang::DiagnosticConsumer { std::vector m_diagnostics; /// The DiagnosticPrinter used for creating the full diagnostic messages /// that are stored in m_diagnostics. - std::shared_ptr m_diag_printer; + std::unique_ptr m_diag_printer; /// Output stream of m_diag_printer. - std::shared_ptr m_os; + std::unique_ptr m_os; /// Output string filled by m_os. Will be reused for different diagnostics. std::string m_output; - Log *m_log; /// A Progress with explicitly managed lifetime. std::unique_ptr m_current_progress_up; std::vector m_module_build_stack; @@ -134,12 +133,10 @@ class ClangModulesDeclVendorImpl : public ClangModulesDeclVendor { } // anonymous namespace StoringDiagnosticConsumer::StoringDiagnosticConsumer() { - m_log = GetLog(LLDBLog::Expressions); - - clang::DiagnosticOptions *m_options = new clang::DiagnosticOptions(); - m_os = std::make_shared(m_output); + auto *options = new clang::DiagnosticOptions(); + m_os = std::make_unique(m_output); m_diag_printer = - std::make_shared(*m_os, m_options); + std::make_unique(*m_os, options); } void StoringDiagnosticConsumer::HandleDiagnostic( diff --git a/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp b/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp index 2e43aa3fa1d8bf..ef0f67d1e9f9e7 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp @@ -74,12 +74,12 @@ class BlockPointerSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~BlockPointerSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override { + uint32_t CalculateNumChildren() override { const bool omit_empty_base_classes = false; return m_block_struct_type.GetNumChildren(omit_empty_base_classes, nullptr); } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { if (!m_block_struct_type.IsValid()) { return lldb::ValueObjectSP(); } diff --git a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp index 742017438bcf4a..3827f9c21effab 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp @@ -104,7 +104,7 @@ lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd:: lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd:: ~StdlibCoroutineHandleSyntheticFrontEnd() = default; -size_t lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd:: +uint32_t lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd:: CalculateNumChildren() { if (!m_resume_ptr_sp || !m_destroy_ptr_sp) return 0; @@ -113,7 +113,7 @@ size_t lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd:: } lldb::ValueObjectSP lldb_private::formatters:: - StdlibCoroutineHandleSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + StdlibCoroutineHandleSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { switch (idx) { case 0: return m_resume_ptr_sp; diff --git a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h index d38c7ecefa6e13..5c6a80b57ff424 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h +++ b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.h @@ -34,9 +34,9 @@ class StdlibCoroutineHandleSyntheticFrontEnd ~StdlibCoroutineHandleSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp index ac316638523584..6a9da1d17c7620 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp @@ -34,8 +34,8 @@ class GenericBitsetFrontEnd : public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; - size_t CalculateNumChildren() override { return m_elements.size(); } - ValueObjectSP GetChildAtIndex(size_t idx) override; + uint32_t CalculateNumChildren() override { return m_elements.size(); } + ValueObjectSP GetChildAtIndex(uint32_t idx) override; private: llvm::StringRef GetDataContainerMemberName(); @@ -97,7 +97,7 @@ lldb::ChildCacheState GenericBitsetFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -ValueObjectSP GenericBitsetFrontEnd::GetChildAtIndex(size_t idx) { +ValueObjectSP GenericBitsetFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx >= m_elements.size() || !m_first) return ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp index 57331eaa986890..c06afb53eb8aad 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp @@ -41,9 +41,9 @@ class GenericOptionalFrontend : public SyntheticChildrenFrontEnd { } bool MightHaveChildren() override { return true; } - size_t CalculateNumChildren() override { return m_has_value ? 1U : 0U; } + uint32_t CalculateNumChildren() override { return m_has_value ? 1U : 0U; } - ValueObjectSP GetChildAtIndex(size_t idx) override; + ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; private: @@ -81,7 +81,7 @@ lldb::ChildCacheState GenericOptionalFrontend::Update() { return lldb::ChildCacheState::eRefetch; } -ValueObjectSP GenericOptionalFrontend::GetChildAtIndex(size_t _idx) { +ValueObjectSP GenericOptionalFrontend::GetChildAtIndex(uint32_t _idx) { if (!m_has_value) return ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp index 7893aa7cc1f9df..bba887fec3ac3f 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -351,14 +351,14 @@ lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -size_t lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: CalculateNumChildren() { return 2; } lldb::ValueObjectSP lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (m_pair_ptr) return m_pair_ptr->GetChildAtIndex(idx); if (m_pair_sp) @@ -509,13 +509,13 @@ lldb::ChildCacheState lldb_private::formatters:: return lldb::ChildCacheState::eRefetch; } -size_t lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: CalculateNumChildren() { return 2; } lldb::ValueObjectSP lldb_private::formatters:: - LibCxxUnorderedMapIteratorSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + LibCxxUnorderedMapIteratorSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (m_pair_sp) return m_pair_sp->GetChildAtIndex(idx); return lldb::ValueObjectSP(); @@ -566,14 +566,14 @@ lldb_private::formatters::LibcxxSharedPtrSyntheticFrontEnd:: Update(); } -size_t lldb_private::formatters::LibcxxSharedPtrSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxSharedPtrSyntheticFrontEnd:: CalculateNumChildren() { return (m_cntrl ? 1 : 0); } lldb::ValueObjectSP lldb_private::formatters::LibcxxSharedPtrSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (!m_cntrl) return lldb::ValueObjectSP(); @@ -661,7 +661,7 @@ lldb_private::formatters::LibcxxUniquePtrSyntheticFrontEndCreator( : nullptr); } -size_t lldb_private::formatters::LibcxxUniquePtrSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxUniquePtrSyntheticFrontEnd:: CalculateNumChildren() { if (m_value_ptr_sp) return m_deleter_sp ? 2 : 1; @@ -670,7 +670,7 @@ size_t lldb_private::formatters::LibcxxUniquePtrSyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibcxxUniquePtrSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (!m_value_ptr_sp) return lldb::ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h index d823fbd76222db..ad2f58508ab7b6 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h @@ -87,9 +87,9 @@ class LibCxxMapIteratorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: LibCxxMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -135,9 +135,9 @@ class LibCxxUnorderedMapIteratorSyntheticFrontEnd ~LibCxxUnorderedMapIteratorSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -166,9 +166,9 @@ class LibcxxSharedPtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: LibcxxSharedPtrSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -186,9 +186,9 @@ class LibcxxUniquePtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: LibcxxUniquePtrSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp index c81b1e8012f6a9..8e4c36103a744d 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp @@ -90,9 +90,9 @@ class LibcxxStdAtomicSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~LibcxxStdAtomicSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -124,14 +124,14 @@ bool lldb_private::formatters::LibcxxStdAtomicSyntheticFrontEnd:: return true; } -size_t lldb_private::formatters::LibcxxStdAtomicSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdAtomicSyntheticFrontEnd:: CalculateNumChildren() { return m_real_child ? 1 : 0; } lldb::ValueObjectSP lldb_private::formatters::LibcxxStdAtomicSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (idx == 0) return m_real_child->GetSP()->Clone(ConstString("Value")); return nullptr; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp index 3c33f94f923734..00012dfc056e47 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp @@ -26,9 +26,9 @@ class LibcxxInitializerListSyntheticFrontEnd ~LibcxxInitializerListSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -59,7 +59,7 @@ lldb_private::formatters::LibcxxInitializerListSyntheticFrontEnd:: // delete m_start; } -size_t lldb_private::formatters::LibcxxInitializerListSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxInitializerListSyntheticFrontEnd:: CalculateNumChildren() { m_num_elements = 0; ValueObjectSP size_sp(m_backend.GetChildMemberWithName("__size_")); @@ -69,7 +69,7 @@ size_t lldb_private::formatters::LibcxxInitializerListSyntheticFrontEnd:: } lldb::ValueObjectSP lldb_private::formatters:: - LibcxxInitializerListSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + LibcxxInitializerListSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (!m_start) return lldb::ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp index e28ef818b10faf..17f6b737d9f628 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp @@ -136,8 +136,8 @@ class ForwardListFrontEnd : public AbstractListFrontEnd { public: ForwardListFrontEnd(ValueObject &valobj); - size_t CalculateNumChildren() override; - ValueObjectSP GetChildAtIndex(size_t idx) override; + uint32_t CalculateNumChildren() override; + ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; }; @@ -147,9 +147,9 @@ class ListFrontEnd : public AbstractListFrontEnd { ~ListFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -240,7 +240,7 @@ ForwardListFrontEnd::ForwardListFrontEnd(ValueObject &valobj) Update(); } -size_t ForwardListFrontEnd::CalculateNumChildren() { +uint32_t ForwardListFrontEnd::CalculateNumChildren() { if (m_count != UINT32_MAX) return m_count; @@ -253,7 +253,7 @@ size_t ForwardListFrontEnd::CalculateNumChildren() { return m_count; } -ValueObjectSP ForwardListFrontEnd::GetChildAtIndex(size_t idx) { +ValueObjectSP ForwardListFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx >= CalculateNumChildren()) return nullptr; @@ -308,7 +308,7 @@ ListFrontEnd::ListFrontEnd(lldb::ValueObjectSP valobj_sp) Update(); } -size_t ListFrontEnd::CalculateNumChildren() { +uint32_t ListFrontEnd::CalculateNumChildren() { if (m_count != UINT32_MAX) return m_count; if (!m_head || !m_tail || m_node_address == 0) @@ -343,7 +343,7 @@ size_t ListFrontEnd::CalculateNumChildren() { } } -lldb::ValueObjectSP ListFrontEnd::GetChildAtIndex(size_t idx) { +lldb::ValueObjectSP ListFrontEnd::GetChildAtIndex(uint32_t idx) { static ConstString g_value("__value_"); static ConstString g_next("__next_"); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp index d208acfc9da47e..6d24eb03779ca3 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp @@ -177,9 +177,9 @@ class LibcxxStdMapSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~LibcxxStdMapSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -209,7 +209,7 @@ lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd:: Update(); } -size_t lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd:: CalculateNumChildren() { if (m_count != UINT32_MAX) return m_count; @@ -308,7 +308,7 @@ void lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::GetValueOffset( lldb::ValueObjectSP lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { static ConstString g_cc_("__cc_"), g_cc("__cc"); static ConstString g_nc("__nc"); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp index 83f93b16fc9a2d..fbadee89b7b7f2 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxQueue.cpp @@ -28,11 +28,11 @@ class QueueFrontEnd : public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; - size_t CalculateNumChildren() override { + uint32_t CalculateNumChildren() override { return m_container_sp ? m_container_sp->GetNumChildren() : 0; } - ValueObjectSP GetChildAtIndex(size_t idx) override { + ValueObjectSP GetChildAtIndex(uint32_t idx) override { return m_container_sp ? m_container_sp->GetChildAtIndex(idx) : nullptr; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp index c032d67c66cb47..74f54f76735667 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxRangesRefView.cpp @@ -27,12 +27,12 @@ class LibcxxStdRangesRefViewSyntheticFrontEnd ~LibcxxStdRangesRefViewSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override { + uint32_t CalculateNumChildren() override { // __range_ will be the sole child of this type return 1; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { // Since we only have a single child, return it assert(idx == 0); return m_range_sp; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp index 4ddfaef9c0ad54..af2b51d2b54016 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxSpan.cpp @@ -27,9 +27,9 @@ class LibcxxStdSpanSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~LibcxxStdSpanSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; /// Determines properties of the std::span<> associated with this object // @@ -73,14 +73,14 @@ lldb_private::formatters::LibcxxStdSpanSyntheticFrontEnd:: Update(); } -size_t lldb_private::formatters::LibcxxStdSpanSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdSpanSyntheticFrontEnd:: CalculateNumChildren() { return m_num_elements; } lldb::ValueObjectSP lldb_private::formatters::LibcxxStdSpanSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (!m_start) return {}; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp index 546871012d2b38..62bb7d619267a3 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp @@ -26,8 +26,8 @@ class TupleFrontEnd: public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; - size_t CalculateNumChildren() override { return m_elements.size(); } - ValueObjectSP GetChildAtIndex(size_t idx) override; + uint32_t CalculateNumChildren() override { return m_elements.size(); } + ValueObjectSP GetChildAtIndex(uint32_t idx) override; private: // The lifetime of a ValueObject and all its derivative ValueObjects @@ -58,7 +58,7 @@ lldb::ChildCacheState TupleFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -ValueObjectSP TupleFrontEnd::GetChildAtIndex(size_t idx) { +ValueObjectSP TupleFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx >= m_elements.size()) return ValueObjectSP(); if (!m_base) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp index 4cac52f235a19a..b3c36429433570 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp @@ -33,9 +33,9 @@ class LibcxxStdUnorderedMapSyntheticFrontEnd ~LibcxxStdUnorderedMapSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -62,7 +62,7 @@ lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: Update(); } -size_t lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: CalculateNumChildren() { return m_num_elements; } @@ -93,7 +93,7 @@ static bool isUnorderedMap(ConstString type_name) { } lldb::ValueObjectSP lldb_private::formatters:: - LibcxxStdUnorderedMapSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + LibcxxStdUnorderedMapSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx >= CalculateNumChildren()) return lldb::ValueObjectSP(); if (m_tree == nullptr) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp index 7c8fd25fd9f281..463c7b8d7ce3bb 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxValarray.cpp @@ -24,9 +24,9 @@ class LibcxxStdValarraySyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~LibcxxStdValarraySyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -63,7 +63,7 @@ lldb_private::formatters::LibcxxStdValarraySyntheticFrontEnd:: // delete m_finish; } -size_t lldb_private::formatters::LibcxxStdValarraySyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdValarraySyntheticFrontEnd:: CalculateNumChildren() { if (!m_start || !m_finish) return 0; @@ -84,7 +84,7 @@ size_t lldb_private::formatters::LibcxxStdValarraySyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibcxxStdValarraySyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (!m_start || !m_finish) return lldb::ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp index ecbb7cf0ca2b46..1f62062f09be30 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp @@ -205,8 +205,8 @@ class VariantFrontEnd : public SyntheticChildrenFrontEnd { bool MightHaveChildren() override { return true; } lldb::ChildCacheState Update() override; - size_t CalculateNumChildren() override { return m_size; } - ValueObjectSP GetChildAtIndex(size_t idx) override; + uint32_t CalculateNumChildren() override { return m_size; } + ValueObjectSP GetChildAtIndex(uint32_t idx) override; private: size_t m_size = 0; @@ -233,7 +233,7 @@ lldb::ChildCacheState VariantFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -ValueObjectSP VariantFrontEnd::GetChildAtIndex(size_t idx) { +ValueObjectSP VariantFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx >= m_size) return {}; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp index 0c3c3f02b60c7b..fcf727ad2ea027 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVector.cpp @@ -25,9 +25,9 @@ class LibcxxStdVectorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~LibcxxStdVectorSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -46,9 +46,9 @@ class LibcxxVectorBoolSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: LibcxxVectorBoolSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -82,7 +82,7 @@ lldb_private::formatters::LibcxxStdVectorSyntheticFrontEnd:: // delete m_finish; } -size_t lldb_private::formatters::LibcxxStdVectorSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxStdVectorSyntheticFrontEnd:: CalculateNumChildren() { if (!m_start || !m_finish) return 0; @@ -103,7 +103,7 @@ size_t lldb_private::formatters::LibcxxStdVectorSyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::LibcxxStdVectorSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (!m_start || !m_finish) return lldb::ValueObjectSP(); @@ -165,14 +165,14 @@ lldb_private::formatters::LibcxxVectorBoolSyntheticFrontEnd:: } } -size_t lldb_private::formatters::LibcxxVectorBoolSyntheticFrontEnd:: +uint32_t lldb_private::formatters::LibcxxVectorBoolSyntheticFrontEnd:: CalculateNumChildren() { return m_count; } lldb::ValueObjectSP lldb_private::formatters::LibcxxVectorBoolSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { auto iter = m_children.find(idx), end = m_children.end(); if (iter != end) return iter->second; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp index 411551839e1e61..5abb3d50674bc5 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp @@ -43,9 +43,9 @@ class LibstdcppMapIteratorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: explicit LibstdcppMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -64,9 +64,9 @@ class LibStdcppSharedPtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: explicit LibStdcppSharedPtrSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -132,12 +132,12 @@ lldb::ChildCacheState LibstdcppMapIteratorSyntheticFrontEnd::Update() { return lldb::ChildCacheState::eReuse; } -size_t LibstdcppMapIteratorSyntheticFrontEnd::CalculateNumChildren() { +uint32_t LibstdcppMapIteratorSyntheticFrontEnd::CalculateNumChildren() { return 2; } lldb::ValueObjectSP -LibstdcppMapIteratorSyntheticFrontEnd::GetChildAtIndex(size_t idx) { +LibstdcppMapIteratorSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (m_pair_address != 0 && m_pair_type) { if (!m_pair_sp) m_pair_sp = CreateValueObjectFromAddress("pair", m_pair_address, @@ -219,10 +219,10 @@ lldb::ChildCacheState VectorIteratorSyntheticFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -size_t VectorIteratorSyntheticFrontEnd::CalculateNumChildren() { return 1; } +uint32_t VectorIteratorSyntheticFrontEnd::CalculateNumChildren() { return 1; } lldb::ValueObjectSP -VectorIteratorSyntheticFrontEnd::GetChildAtIndex(size_t idx) { +VectorIteratorSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx == 0) return m_item_sp; return lldb::ValueObjectSP(); @@ -371,10 +371,10 @@ LibStdcppSharedPtrSyntheticFrontEnd::LibStdcppSharedPtrSyntheticFrontEnd( Update(); } -size_t LibStdcppSharedPtrSyntheticFrontEnd::CalculateNumChildren() { return 1; } +uint32_t LibStdcppSharedPtrSyntheticFrontEnd::CalculateNumChildren() { return 1; } lldb::ValueObjectSP -LibStdcppSharedPtrSyntheticFrontEnd::GetChildAtIndex(size_t idx) { +LibStdcppSharedPtrSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx == 0) return m_ptr_obj->GetSP(); if (idx == 1) { diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp index 189f9561e52a1b..64d2ec9d943a2a 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp @@ -26,9 +26,9 @@ class LibStdcppTupleSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: explicit LibStdcppTupleSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -89,13 +89,13 @@ lldb::ChildCacheState LibStdcppTupleSyntheticFrontEnd::Update() { bool LibStdcppTupleSyntheticFrontEnd::MightHaveChildren() { return true; } lldb::ValueObjectSP -LibStdcppTupleSyntheticFrontEnd::GetChildAtIndex(size_t idx) { +LibStdcppTupleSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx < m_members.size() && m_members[idx]) return m_members[idx]->GetSP(); return lldb::ValueObjectSP(); } -size_t LibStdcppTupleSyntheticFrontEnd::CalculateNumChildren() { +uint32_t LibStdcppTupleSyntheticFrontEnd::CalculateNumChildren() { return m_members.size(); } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp index 3b0f6329d0e3ff..3a48fe412e0721 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp @@ -26,9 +26,9 @@ class LibStdcppUniquePtrSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: explicit LibStdcppUniquePtrSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -116,7 +116,7 @@ lldb::ChildCacheState LibStdcppUniquePtrSyntheticFrontEnd::Update() { bool LibStdcppUniquePtrSyntheticFrontEnd::MightHaveChildren() { return true; } lldb::ValueObjectSP -LibStdcppUniquePtrSyntheticFrontEnd::GetChildAtIndex(size_t idx) { +LibStdcppUniquePtrSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { if (idx == 0 && m_ptr_obj) return m_ptr_obj->GetSP(); if (idx == 1 && m_del_obj) @@ -135,7 +135,7 @@ LibStdcppUniquePtrSyntheticFrontEnd::GetChildAtIndex(size_t idx) { return lldb::ValueObjectSP(); } -size_t LibStdcppUniquePtrSyntheticFrontEnd::CalculateNumChildren() { +uint32_t LibStdcppUniquePtrSyntheticFrontEnd::CalculateNumChildren() { if (m_del_obj) return 2; return 1; diff --git a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp index 64047dc53236bf..cb740f8e71e168 100644 --- a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp +++ b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp @@ -1038,9 +1038,9 @@ class ObjCClassSyntheticChildrenFrontEnd : public SyntheticChildrenFrontEnd { ~ObjCClassSyntheticChildrenFrontEnd() override = default; - size_t CalculateNumChildren() override { return 0; } + uint32_t CalculateNumChildren() override { return 0; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { return lldb::ValueObjectSP(); } diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp index 09bf7a23d6097e..7f060b2613d6f3 100644 --- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp @@ -50,9 +50,9 @@ class NSArrayMSyntheticFrontEndBase : public SyntheticChildrenFrontEnd { ~NSArrayMSyntheticFrontEndBase() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override = 0; @@ -214,9 +214,9 @@ class GenericNSArrayISyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~GenericNSArrayISyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -302,9 +302,9 @@ class NSArray0SyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSArray0SyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -319,9 +319,9 @@ class NSArray1SyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSArray1SyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -477,14 +477,14 @@ lldb_private::formatters:: : NSArrayMSyntheticFrontEndBase(valobj_sp), m_data_32(nullptr), m_data_64(nullptr) {} -size_t -lldb_private::formatters::NSArrayMSyntheticFrontEndBase::CalculateNumChildren() { +uint32_t lldb_private::formatters::NSArrayMSyntheticFrontEndBase:: + CalculateNumChildren() { return GetUsedCount(); } lldb::ValueObjectSP lldb_private::formatters::NSArrayMSyntheticFrontEndBase::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (idx >= CalculateNumChildren()) return lldb::ValueObjectSP(); lldb::addr_t object_at_idx = GetDataAddress(); @@ -634,7 +634,7 @@ lldb_private::formatters::GenericNSArrayISyntheticFrontEnd:: } template -size_t +uint32_t lldb_private::formatters::GenericNSArrayISyntheticFrontEnd:: CalculateNumChildren() { return m_data_32 ? m_data_32->used : m_data_64->used; @@ -684,7 +684,7 @@ lldb_private::formatters::GenericNSArrayISyntheticFrontEnd:: template lldb::ValueObjectSP lldb_private::formatters::GenericNSArrayISyntheticFrontEnd:: - GetChildAtIndex(size_t idx) { + GetChildAtIndex(uint32_t idx) { if (idx >= CalculateNumChildren()) return lldb::ValueObjectSP(); lldb::addr_t object_at_idx; @@ -719,7 +719,7 @@ lldb_private::formatters::NSArray0SyntheticFrontEnd::GetIndexOfChildWithName( return UINT32_MAX; } -size_t +uint32_t lldb_private::formatters::NSArray0SyntheticFrontEnd::CalculateNumChildren() { return 0; } @@ -735,7 +735,7 @@ bool lldb_private::formatters::NSArray0SyntheticFrontEnd::MightHaveChildren() { lldb::ValueObjectSP lldb_private::formatters::NSArray0SyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { return lldb::ValueObjectSP(); } @@ -754,7 +754,7 @@ lldb_private::formatters::NSArray1SyntheticFrontEnd::GetIndexOfChildWithName( return UINT32_MAX; } -size_t +uint32_t lldb_private::formatters::NSArray1SyntheticFrontEnd::CalculateNumChildren() { return 1; } @@ -770,7 +770,7 @@ bool lldb_private::formatters::NSArray1SyntheticFrontEnd::MightHaveChildren() { lldb::ValueObjectSP lldb_private::formatters::NSArray1SyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { static const ConstString g_zero("[0]"); if (idx == 0) { diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp index 9c252a98de8357..da94eda1529ce1 100644 --- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp @@ -103,9 +103,9 @@ class NSDictionaryISyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSDictionaryISyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -144,9 +144,9 @@ class NSConstantDictionarySyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: NSConstantDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -176,9 +176,9 @@ class NSCFDictionarySyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: NSCFDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -209,9 +209,9 @@ class NSDictionary1SyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSDictionary1SyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -230,9 +230,9 @@ class GenericNSDictionaryMSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~GenericNSDictionaryMSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -263,9 +263,9 @@ namespace Foundation1100 { ~NSDictionaryMSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -606,7 +606,7 @@ size_t lldb_private::formatters::NSDictionaryISyntheticFrontEnd:: return idx; } -size_t lldb_private::formatters::NSDictionaryISyntheticFrontEnd:: +uint32_t lldb_private::formatters::NSDictionaryISyntheticFrontEnd:: CalculateNumChildren() { if (!m_data_32 && !m_data_64) return 0; @@ -655,7 +655,7 @@ bool lldb_private::formatters::NSDictionaryISyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::NSDictionaryISyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { uint32_t num_children = CalculateNumChildren(); if (idx >= num_children) @@ -744,7 +744,7 @@ size_t lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: return idx; } -size_t lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: +uint32_t lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: CalculateNumChildren() { if (!m_hashtable.IsValid()) return 0; @@ -777,7 +777,7 @@ bool lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::NSCFDictionarySyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { lldb::addr_t m_keys_ptr = m_hashtable.GetKeyPointer(); lldb::addr_t m_values_ptr = m_hashtable.GetValuePointer(); @@ -880,7 +880,7 @@ size_t lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd:: return idx; } -size_t lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd:: +uint32_t lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd:: CalculateNumChildren() { return m_size; } @@ -920,7 +920,7 @@ bool lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd:: } lldb::ValueObjectSP lldb_private::formatters:: - NSConstantDictionarySyntheticFrontEnd::GetChildAtIndex(size_t idx) { + NSConstantDictionarySyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { uint32_t num_children = CalculateNumChildren(); if (idx >= num_children) @@ -994,7 +994,7 @@ size_t lldb_private::formatters::NSDictionary1SyntheticFrontEnd:: return name == g_zero ? 0 : UINT32_MAX; } -size_t lldb_private::formatters::NSDictionary1SyntheticFrontEnd:: +uint32_t lldb_private::formatters::NSDictionary1SyntheticFrontEnd:: CalculateNumChildren() { return 1; } @@ -1012,7 +1012,7 @@ bool lldb_private::formatters::NSDictionary1SyntheticFrontEnd:: lldb::ValueObjectSP lldb_private::formatters::NSDictionary1SyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { if (idx != 0) return lldb::ValueObjectSP(); @@ -1087,7 +1087,7 @@ size_t lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd< } template -size_t +uint32_t lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) return 0; @@ -1140,7 +1140,7 @@ lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd:: template lldb::ValueObjectSP lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd< - D32, D64>::GetChildAtIndex(size_t idx) { + D32, D64>::GetChildAtIndex(uint32_t idx) { lldb::addr_t m_keys_ptr; lldb::addr_t m_values_ptr; if (m_data_32) { @@ -1250,7 +1250,7 @@ lldb_private::formatters::Foundation1100:: return idx; } -size_t +uint32_t lldb_private::formatters::Foundation1100:: NSDictionaryMSyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) @@ -1300,7 +1300,7 @@ lldb_private::formatters::Foundation1100:: lldb::ValueObjectSP lldb_private::formatters::Foundation1100:: - NSDictionaryMSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + NSDictionaryMSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { lldb::addr_t m_keys_ptr = (m_data_32 ? m_data_32->_keys_addr : m_data_64->_keys_addr); lldb::addr_t m_values_ptr = diff --git a/lldb/source/Plugins/Language/ObjC/NSError.cpp b/lldb/source/Plugins/Language/ObjC/NSError.cpp index ce52ae542a50cb..b034e799b716e7 100644 --- a/lldb/source/Plugins/Language/ObjC/NSError.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSError.cpp @@ -116,7 +116,7 @@ class NSErrorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { // no need to delete m_child_ptr - it's kept alive by the cluster manager on // our behalf - size_t CalculateNumChildren() override { + uint32_t CalculateNumChildren() override { if (m_child_ptr) return 1; if (m_child_sp) @@ -124,7 +124,7 @@ class NSErrorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { return 0; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { if (idx != 0) return lldb::ValueObjectSP(); diff --git a/lldb/source/Plugins/Language/ObjC/NSException.cpp b/lldb/source/Plugins/Language/ObjC/NSException.cpp index e8011e5d2ca0be..09d3a1b42b747f 100644 --- a/lldb/source/Plugins/Language/ObjC/NSException.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSException.cpp @@ -123,11 +123,11 @@ class NSExceptionSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSExceptionSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override { + uint32_t CalculateNumChildren() override { return 4; } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { switch (idx) { case 0: return m_name_sp; case 1: return m_reason_sp; diff --git a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp index 69e6ab1055d8c6..10bb907c58ed42 100644 --- a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp @@ -40,9 +40,9 @@ class NSIndexPathSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSIndexPathSyntheticFrontEnd() override = default; - size_t CalculateNumChildren() override { return m_impl.GetNumIndexes(); } + uint32_t CalculateNumChildren() override { return m_impl.GetNumIndexes(); } - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override { return m_impl.GetIndexAtIndex(idx, m_uint_star_type); } diff --git a/lldb/source/Plugins/Language/ObjC/NSSet.cpp b/lldb/source/Plugins/Language/ObjC/NSSet.cpp index ede64852d9a879..c965a2a1340030 100644 --- a/lldb/source/Plugins/Language/ObjC/NSSet.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSSet.cpp @@ -46,9 +46,9 @@ class NSSetISyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSSetISyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -84,9 +84,9 @@ class NSCFSetSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: NSCFSetSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -117,9 +117,9 @@ class GenericNSSetMSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~GenericNSSetMSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -233,9 +233,9 @@ class NSSetCodeRunningSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ~NSSetCodeRunningSyntheticFrontEnd() override; - size_t CalculateNumChildren() override; + uint32_t CalculateNumChildren() override; - lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; lldb::ChildCacheState Update() override; @@ -419,7 +419,7 @@ lldb_private::formatters::NSSetISyntheticFrontEnd::GetIndexOfChildWithName( return idx; } -size_t +uint32_t lldb_private::formatters::NSSetISyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) return 0; @@ -466,7 +466,8 @@ bool lldb_private::formatters::NSSetISyntheticFrontEnd::MightHaveChildren() { } lldb::ValueObjectSP -lldb_private::formatters::NSSetISyntheticFrontEnd::GetChildAtIndex(size_t idx) { +lldb_private::formatters::NSSetISyntheticFrontEnd::GetChildAtIndex( + uint32_t idx) { uint32_t num_children = CalculateNumChildren(); if (idx >= num_children) @@ -555,7 +556,7 @@ lldb_private::formatters::NSCFSetSyntheticFrontEnd::GetIndexOfChildWithName( return idx; } -size_t +uint32_t lldb_private::formatters::NSCFSetSyntheticFrontEnd::CalculateNumChildren() { if (!m_hashtable.IsValid()) return 0; @@ -587,7 +588,7 @@ bool lldb_private::formatters::NSCFSetSyntheticFrontEnd::MightHaveChildren() { lldb::ValueObjectSP lldb_private::formatters::NSCFSetSyntheticFrontEnd::GetChildAtIndex( - size_t idx) { + uint32_t idx) { lldb::addr_t m_values_ptr = m_hashtable.GetValuePointer(); const uint32_t num_children = CalculateNumChildren(); @@ -696,7 +697,7 @@ lldb_private::formatters:: } template -size_t +uint32_t lldb_private::formatters:: GenericNSSetMSyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) @@ -748,7 +749,7 @@ lldb_private::formatters:: template lldb::ValueObjectSP lldb_private::formatters:: - GenericNSSetMSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + GenericNSSetMSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { lldb::addr_t m_objs_addr = (m_data_32 ? m_data_32->_objs_addr : m_data_64->_objs_addr); diff --git a/lldb/test/API/commands/platform/connect/TestPlatformConnect.py b/lldb/test/API/commands/platform/connect/TestPlatformConnect.py index 6a0f036c007079..fc6c2ee98df44d 100644 --- a/lldb/test/API/commands/platform/connect/TestPlatformConnect.py +++ b/lldb/test/API/commands/platform/connect/TestPlatformConnect.py @@ -13,6 +13,13 @@ class TestPlatformProcessConnect(TestBase): @expectedFailureAll(hostoslist=["windows"], triple=".*-android") @skipIfDarwin # lldb-server not found correctly @expectedFailureAll(oslist=["windows"]) # process modules not loaded + # lldb-server platform times out waiting for the gdbserver port number to be + # written to the pipe, yet it seems the gdbserver already has written it. + @expectedFailureAll( + archs=["aarch64"], + oslist=["freebsd"], + bugnumber="https://github.com/llvm/llvm-project/issues/84327", + ) @add_test_categories(["lldb-server"]) def test_platform_process_connect(self): self.build() diff --git a/lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp b/lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp index 2f3a95dc5c6eef..d72051e4ee84d9 100644 --- a/lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp +++ b/lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py b/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py index dfe29b451df0a6..2bffd2eea123a6 100644 --- a/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py +++ b/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py @@ -40,6 +40,7 @@ def setUp(self): @skipIfWindows @skipIf(oslist=["linux"], archs=["arm", "aarch64"]) @skipIf(oslist=no_match([lldbplatformutil.getDarwinOSTriples(), "linux"])) + @expectedFailureIf(lldbplatformutil.xcode15LinkerBug()) def test(self): """Test thread-local storage.""" self.build() diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index 950643a5b8cc8e..0ef2eb1c42ce06 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -34,7 +34,9 @@ endif() # The "pexpect" package should come from the system environment, not from the # LLDB tree. However, we delay the deletion of it from the tree in case # users/buildbots don't have the package yet and need some time to install it. -if (NOT LLDB_TEST_USE_VENDOR_PACKAGES) +# Windows is configured to skip all pexpect tests, and guards all +# "import pexpect" calls, so we do not need pexpect installed there. +if (NOT LLDB_TEST_USE_VENDOR_PACKAGES AND NOT WIN32) unset(PY_pexpect_FOUND CACHE) lldb_find_python_module(pexpect) if (NOT PY_pexpect_FOUND) diff --git a/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test b/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test index 3df9906394f432..7b5d6650fe2f75 100644 --- a/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test +++ b/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test @@ -1,7 +1,7 @@ # Test handing of dwarf expressions specifying the location of registers, if # those expressions refer to the frame's CFA value. -# UNSUPPORTED: system-windows +# UNSUPPORTED: system-windows, ld_new-bug # REQUIRES: target-x86_64, native # RUN: %clang_host %p/Inputs/call-asm.c %p/Inputs/eh-frame-dwarf-unwind.s -o %t diff --git a/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test b/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test index 682b0e5332b1c5..9bc7c78f79b26b 100644 --- a/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test +++ b/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test @@ -2,7 +2,7 @@ # points to non-executable memory. # REQUIRES: target-x86_64 -# UNSUPPORTED: system-windows +# UNSUPPORTED: system-windows, ld_new-bug # RUN: %clang_host %p/Inputs/call-asm.c -x assembler-with-cpp %p/Inputs/thread-step-out-ret-addr-check.s -o %t # RUN: not %lldb %t -s %s -b 2>&1 | FileCheck %s diff --git a/lldb/test/Shell/lit.cfg.py b/lldb/test/Shell/lit.cfg.py index d75c1f532e147f..31afe5151c0661 100644 --- a/lldb/test/Shell/lit.cfg.py +++ b/lldb/test/Shell/lit.cfg.py @@ -1,5 +1,6 @@ # -*- Python -*- +import json import os import platform import re @@ -179,3 +180,18 @@ def calculate_arch_features(arch_string): if "LD_PRELOAD" in os.environ: config.available_features.add("ld_preload-present") + +# Determine if a specific version of Xcode's linker contains a bug. We want to +# skip affected tests if they contain this bug. +if platform.system() == "Darwin": + try: + raw_version_details = subprocess.check_output( + ("xcrun", "ld", "-version_details") + ) + version_details = json.loads(raw_version_details) + version = version_details.get("version", "0") + version_tuple = tuple(int(x) for x in version.split(".")) + if (1000,) <= version_tuple <= (1109,): + config.available_features.add("ld_new-bug") + except: + pass diff --git a/lldb/unittests/Core/ProgressReportTest.cpp b/lldb/unittests/Core/ProgressReportTest.cpp index 98cbc475ce2835..1f993180fd8392 100644 --- a/lldb/unittests/Core/ProgressReportTest.cpp +++ b/lldb/unittests/Core/ProgressReportTest.cpp @@ -22,9 +22,29 @@ using namespace lldb; using namespace lldb_private; +static std::chrono::milliseconds TIMEOUT(100); + class ProgressReportTest : public ::testing::Test { - SubsystemRAII subsystems; +public: + ListenerSP CreateListenerFor(uint32_t bit) { + // Set up the debugger, make sure that was done properly. + ArchSpec arch("x86_64-apple-macosx-"); + Platform::SetHostPlatform( + PlatformRemoteMacOSX::CreateInstance(true, &arch)); + + m_debugger_sp = Debugger::CreateInstance(); + + // Get the debugger's broadcaster. + Broadcaster &broadcaster = m_debugger_sp->GetBroadcaster(); + + // Create a listener, make sure it can receive events and that it's + // listening to the correct broadcast bit. + m_listener_sp = Listener::MakeListener("progress-listener"); + m_listener_sp->StartListeningForEvents(&broadcaster, bit); + return m_listener_sp; + } +protected: // The debugger's initialization function can't be called with no arguments // so calling it using SubsystemRAII will cause the test build to fail as // SubsystemRAII will call Initialize with no arguments. As such we set it up @@ -33,30 +53,14 @@ class ProgressReportTest : public ::testing::Test { std::call_once(TestUtilities::g_debugger_initialize_flag, []() { Debugger::Initialize(nullptr); }); }; + + DebuggerSP m_debugger_sp; + ListenerSP m_listener_sp; + SubsystemRAII subsystems; }; TEST_F(ProgressReportTest, TestReportCreation) { - std::chrono::milliseconds timeout(100); - - // Set up the debugger, make sure that was done properly. - ArchSpec arch("x86_64-apple-macosx-"); - Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch)); - - DebuggerSP debugger_sp = Debugger::CreateInstance(); - ASSERT_TRUE(debugger_sp); - - // Get the debugger's broadcaster. - Broadcaster &broadcaster = debugger_sp->GetBroadcaster(); - - // Create a listener, make sure it can receive events and that it's - // listening to the correct broadcast bit. - ListenerSP listener_sp = Listener::MakeListener("progress-listener"); - - listener_sp->StartListeningForEvents(&broadcaster, - Debugger::eBroadcastBitProgress); - EXPECT_TRUE( - broadcaster.EventTypeHasListeners(Debugger::eBroadcastBitProgress)); - + ListenerSP listener_sp = CreateListenerFor(Debugger::eBroadcastBitProgress); EventSP event_sp; const ProgressEventData *data; @@ -73,82 +77,64 @@ TEST_F(ProgressReportTest, TestReportCreation) { // in this order: // Starting progress: 1, 2, 3 // Ending progress: 3, 2, 1 - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), "Starting report 1"); - ASSERT_FALSE(data->IsFinite()); - ASSERT_FALSE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Progress report 1: Starting report 1"); + EXPECT_EQ(data->GetDetails(), "Starting report 1"); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 1: Starting report 1"); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), "Starting report 2"); - ASSERT_FALSE(data->IsFinite()); - ASSERT_FALSE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Progress report 2: Starting report 2"); + EXPECT_EQ(data->GetDetails(), "Starting report 2"); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 2: Starting report 2"); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), "Starting report 3"); - ASSERT_FALSE(data->IsFinite()); - ASSERT_FALSE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Progress report 3: Starting report 3"); + + EXPECT_EQ(data->GetDetails(), "Starting report 3"); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 3: Starting report 3"); // Progress report objects should be destroyed at this point so // get each report from the queue and check that they've been // destroyed in reverse order. - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetTitle(), "Progress report 3"); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_FALSE(data->IsFinite()); - ASSERT_EQ(data->GetMessage(), "Progress report 3: Starting report 3"); + EXPECT_EQ(data->GetTitle(), "Progress report 3"); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_FALSE(data->IsFinite()); + EXPECT_EQ(data->GetMessage(), "Progress report 3: Starting report 3"); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetTitle(), "Progress report 2"); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_FALSE(data->IsFinite()); - ASSERT_EQ(data->GetMessage(), "Progress report 2: Starting report 2"); + EXPECT_EQ(data->GetTitle(), "Progress report 2"); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_FALSE(data->IsFinite()); + EXPECT_EQ(data->GetMessage(), "Progress report 2: Starting report 2"); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetTitle(), "Progress report 1"); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_FALSE(data->IsFinite()); - ASSERT_EQ(data->GetMessage(), "Progress report 1: Starting report 1"); + EXPECT_EQ(data->GetTitle(), "Progress report 1"); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_FALSE(data->IsFinite()); + EXPECT_EQ(data->GetMessage(), "Progress report 1: Starting report 1"); } TEST_F(ProgressReportTest, TestProgressManager) { - std::chrono::milliseconds timeout(100); - - // Set up the debugger, make sure that was done properly. - ArchSpec arch("x86_64-apple-macosx-"); - Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch)); - - DebuggerSP debugger_sp = Debugger::CreateInstance(); - ASSERT_TRUE(debugger_sp); - - // Get the debugger's broadcaster. - Broadcaster &broadcaster = debugger_sp->GetBroadcaster(); - - // Create a listener, make sure it can receive events and that it's - // listening to the correct broadcast bit. - ListenerSP listener_sp = Listener::MakeListener("progress-category-listener"); - - listener_sp->StartListeningForEvents(&broadcaster, - Debugger::eBroadcastBitProgressCategory); - EXPECT_TRUE(broadcaster.EventTypeHasListeners( - Debugger::eBroadcastBitProgressCategory)); - + ListenerSP listener_sp = + CreateListenerFor(Debugger::eBroadcastBitProgressCategory); EventSP event_sp; const ProgressEventData *data; @@ -160,28 +146,35 @@ TEST_F(ProgressReportTest, TestProgressManager) { Progress progress1("Progress report 1", "Starting report 1"); Progress progress2("Progress report 1", "Starting report 2"); Progress progress3("Progress report 1", "Starting report 3"); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); - EXPECT_FALSE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); + ASSERT_FALSE(listener_sp->GetEvent(event_sp, TIMEOUT)); } data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), ""); - ASSERT_FALSE(data->IsFinite()); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Progress report 1"); + EXPECT_EQ(data->GetDetails(), ""); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 1"); // Pop another event from the queue, this should be the event for the final // report for this category. - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); - + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), ""); - ASSERT_FALSE(data->IsFinite()); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Progress report 1"); + + EXPECT_EQ(data->GetDetails(), ""); + EXPECT_FALSE(data->IsFinite()); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Progress report 1"); +} + +TEST_F(ProgressReportTest, TestOverlappingEvents) { + ListenerSP listener_sp = + CreateListenerFor(Debugger::eBroadcastBitProgressCategory); + EventSP event_sp; + const ProgressEventData *data; // Create two progress reports of the same category that overlap with each // other. Here we want to ensure that the ID broadcasted for the initial and @@ -192,28 +185,28 @@ TEST_F(ProgressReportTest, TestProgressManager) { std::make_unique("Overlapping report 1", "Starting report 2"); overlap_progress1.reset(); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); // Get the ID used in the first report for this category. uint64_t expected_progress_id = data->GetID(); - ASSERT_EQ(data->GetDetails(), ""); - ASSERT_FALSE(data->IsFinite()); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Overlapping report 1"); + EXPECT_EQ(data->GetDetails(), ""); + EXPECT_FALSE(data->IsFinite()); + EXPECT_FALSE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Overlapping report 1"); overlap_progress2.reset(); - EXPECT_TRUE(listener_sp->GetEvent(event_sp, timeout)); + ASSERT_TRUE(listener_sp->GetEvent(event_sp, TIMEOUT)); data = ProgressEventData::GetEventDataFromEvent(event_sp.get()); - ASSERT_EQ(data->GetDetails(), ""); - ASSERT_FALSE(data->IsFinite()); - ASSERT_TRUE(data->GetCompleted()); - ASSERT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); - ASSERT_EQ(data->GetMessage(), "Overlapping report 1"); + EXPECT_EQ(data->GetDetails(), ""); + EXPECT_FALSE(data->IsFinite()); + EXPECT_TRUE(data->GetCompleted()); + EXPECT_EQ(data->GetTotal(), Progress::kNonDeterministicTotal); + EXPECT_EQ(data->GetMessage(), "Overlapping report 1"); // The progress ID for the final report should be the same as that for the // initial report. - ASSERT_EQ(data->GetID(), expected_progress_id); + EXPECT_EQ(data->GetID(), expected_progress_id); } diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index f89483904ab737..763aeb87c68805 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -215,7 +215,7 @@ what to add to your calendar invite. - `gcal `__ - `Minutes/docs `__ * - LLVM SPIR-V Backend Working Group - - Every week on Thursday + - Every week on Monday - - `Meeting details/agenda `__ * - SYCL Upstream Working Group @@ -305,11 +305,6 @@ The :doc:`CodeOfConduct` applies to all office hours. - Monthly, 2nd Wednesday of the month at 11:00am PT, for 30 minutes. - `Zoom `__ - English, Russian - * - Michal Paszkowski - - SPIR-V backend, IGC, OpenCL, and IR transformations - - Monthly, 3rd Thursday of the month at 21:00 Warsaw/Poland time, 1 hour slot. - - `MS Teams `__ - - English, Polish * - Quentin Colombet (he/him) - LLVM/MLIR; Codegen (Instruction selection (GlobalISel/SDISel), Machine IR, Register allocation, etc.); Optimizations; MCA diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 33b0152bd7b49c..dda367607d0432 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -639,6 +639,11 @@ Concatenate two vectors and shuffle the elements according to the mask operand. The mask operand should be an IR Constant which exactly matches the corresponding mask for the IR shufflevector instruction. +G_SPLAT_VECTOR +^^^^^^^^^^^^^^^^ + +Create a vector where all elements are the scalar from the source operand. + Vector Reduction Operations --------------------------- diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 71573b6b3b8f77..7f661bb4a1df20 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1327,6 +1327,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { ArrayRef Mask = Shuffle->getShuffleMask(); int NumSubElts, SubIndex; + // TODO: move more of this inside improveShuffleKindFromMask. if (Shuffle->changesLength()) { // Treat a 'subvector widening' as a free shuffle. if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) @@ -1355,7 +1356,35 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { DemandedDstElts, CostKind); } - return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; + bool IsUnary = isa(Operands[1]); + NumSubElts = VecSrcTy->getElementCount().getKnownMinValue(); + SmallVector AdjustMask(Mask.begin(), Mask.end()); + + // Widening shuffle - widening the source(s) to the new length + // (treated as free - see above), and then perform the adjusted + // shuffle at that width. + if (Shuffle->increasesLength()) { + for (int &M : AdjustMask) + M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M; + + return TargetTTI->getShuffleCost( + IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy, + AdjustMask, CostKind, 0, nullptr); + } + + // Narrowing shuffle - perform shuffle at original wider width and + // then extract the lower elements. + AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem); + + InstructionCost ShuffleCost = TargetTTI->getShuffleCost( + IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, + VecSrcTy, AdjustMask, CostKind, 0, nullptr); + + SmallVector ExtractMask(Mask.size()); + std::iota(ExtractMask.begin(), ExtractMask.end(), 0); + return ShuffleCost + TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, + VecSrcTy, ExtractMask, + CostKind, 0, VecTy); } if (Shuffle->isIdentity()) diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index f07f4c61f9d649..e5e1ade8b38b36 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -178,6 +178,9 @@ namespace llvm { /// UpgradeDebuginfo so it can generate broken bitcode. bool UpgradeDebugInfo; + bool SeenNewDbgInfoFormat = false; + bool SeenOldDbgInfoFormat = false; + std::string SourceFileName; public: @@ -573,6 +576,7 @@ namespace llvm { bool parseMDNodeTail(MDNode *&N); bool parseMDNodeVector(SmallVectorImpl &Elts); bool parseMetadataAttachment(unsigned &Kind, MDNode *&MD); + bool parseDebugRecord(DbgRecord *&DR, PerFunctionState &PFS); bool parseInstructionMetadata(Instruction &Inst); bool parseGlobalObjectMetadataAttachment(GlobalObject &GO); bool parseOptionalFunctionMetadata(Function &F); diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 3c34706ee03e82..5863a8d6e8ee84 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -36,6 +36,7 @@ enum Kind { exclaim, // ! bar, // | colon, // : + hash, // # kw_vscale, kw_x, @@ -479,6 +480,7 @@ enum Kind { DISPFlag, // DISPFlagFoo DwarfMacinfo, // DW_MACINFO_foo ChecksumKind, // CSK_foo + DbgRecordType, // dbg_foo // Type valued tokens (TyVal). Type, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 1387a0a37561c4..6762b1b360d5e8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1063,8 +1063,7 @@ class MachineIRBuilder { /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill /// the number of elements - MachineInstrBuilder buildSplatVector(const DstOp &Res, - const SrcOp &Src); + MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src); /// Build and insert \p Res = G_BUILD_VECTOR_TRUNC \p Op0, ... /// @@ -1099,6 +1098,15 @@ class MachineIRBuilder { MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef Mask); + /// Build and insert \p Res = G_SPLAT_VECTOR \p Val + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register with vector type. + /// \pre \p Val must be a generic virtual register with scalar type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val); + /// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ... /// /// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h index bc35f2ab988ed2..c7c558850a2805 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h @@ -454,8 +454,8 @@ class CFIProgram { /// where a problem occurred in case an error is returned. Error parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset); - void dump(raw_ostream &OS, DIDumpOptions DumpOpts, - unsigned IndentLevel = 1) const; + void dump(raw_ostream &OS, DIDumpOptions DumpOpts, unsigned IndentLevel, + std::optional InitialLocation) const; void addInstruction(const Instruction &I) { Instructions.push_back(I); } @@ -524,7 +524,7 @@ class CFIProgram { /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand. void printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, const Instruction &Instr, unsigned OperandIdx, - uint64_t Operand) const; + uint64_t Operand, std::optional &Address) const; }; /// An entry in either debug_frame or eh_frame. This entry can be a CIE or an diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h new file mode 100644 index 00000000000000..9deb38a1a71fb1 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h @@ -0,0 +1,66 @@ +//===--- VTuneSupportPlugin.h -- Support for VTune profiler ---*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Handles support for registering code with VIntel Tune's Amplifier JIT API. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_DEBUGGING_VTUNESUPPORT_H +#define LLVM_EXECUTIONENGINE_ORC_DEBUGGING_VTUNESUPPORT_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" +#include "llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h" + +namespace llvm { + +namespace orc { + +class VTuneSupportPlugin : public ObjectLinkingLayer::Plugin { +public: + VTuneSupportPlugin(ExecutorProcessControl &EPC, ExecutorAddr RegisterImplAddr, + ExecutorAddr UnregisterImplAddr, bool EmitDebugInfo) + : EPC(EPC), RegisterVTuneImplAddr(RegisterImplAddr), + UnregisterVTuneImplAddr(UnregisterImplAddr), + EmitDebugInfo(EmitDebugInfo) {} + + void modifyPassConfig(MaterializationResponsibility &MR, + jitlink::LinkGraph &G, + jitlink::PassConfiguration &Config) override; + + Error notifyEmitted(MaterializationResponsibility &MR) override; + Error notifyFailed(MaterializationResponsibility &MR) override; + Error notifyRemovingResources(JITDylib &JD, ResourceKey K) override; + void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey, + ResourceKey SrcKey) override; + + static Expected> + Create(ExecutorProcessControl &EPC, JITDylib &JD, bool EmitDebugInfo, + bool TestMode = false); + +private: + ExecutorProcessControl &EPC; + ExecutorAddr RegisterVTuneImplAddr; + ExecutorAddr UnregisterVTuneImplAddr; + std::mutex PluginMutex; + uint64_t NextMethodID = 0; + DenseMap> + PendingMethodIDs; + DenseMap>> + LoadedMethodIDs; + bool EmitDebugInfo; +}; + +} // end namespace orc + +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h index 76d16e63df2815..810a38f4a6acb8 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -311,6 +311,8 @@ class LLJITBuilderState { using PlatformSetupFunction = unique_function(LLJIT &J)>; + using NotifyCreatedFunction = std::function; + std::unique_ptr EPC; std::unique_ptr ES; std::optional JTMB; @@ -321,6 +323,7 @@ class LLJITBuilderState { CompileFunctionCreator CreateCompileFunction; unique_function PrePlatformSetup; PlatformSetupFunction SetUpPlatform; + NotifyCreatedFunction NotifyCreated; unsigned NumCompileThreads = 0; /// Called prior to JIT class construcion to fix up defaults. @@ -441,6 +444,16 @@ class LLJITBuilderSetters { return impl(); } + /// Set up a callback after successful construction of the JIT. + /// + /// This is useful to attach generators to JITDylibs or inject initial symbol + /// definitions. + SetterImpl & + setNotifyCreatedCallback(LLJITBuilderState::NotifyCreatedFunction Callback) { + impl().NotifyCreated = std::move(Callback); + return impl(); + } + /// Set the number of compile threads to use. /// /// If set to zero, compilation will be performed on the execution thread when @@ -474,6 +487,11 @@ class LLJITBuilderSetters { std::unique_ptr J(new JITType(impl(), Err)); if (Err) return std::move(Err); + + if (impl().NotifyCreated) + if (Error Err = impl().NotifyCreated(*J)) + return std::move(Err); + return std::move(J); } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h new file mode 100644 index 00000000000000..667d3446faff74 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h @@ -0,0 +1,102 @@ +//===-------------------- VTuneSharedStructs.h ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Structs and serialization to share VTune-related information +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_VTUNESHAREDSTRUCTS_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_VTUNESHAREDSTRUCTS_H + +namespace llvm { +namespace orc { + +using VTuneLineTable = std::vector>; + +// SI = String Index, 1-indexed into the VTuneMethodBatch::Strings table. +// SI == 0 means replace with nullptr. + +// MI = Method Index, 1-indexed into the VTuneMethodBatch::Methods table. +// MI == 0 means this is a parent method and was not inlined. + +struct VTuneMethodInfo { + VTuneLineTable LineTable; + ExecutorAddr LoadAddr; + uint64_t LoadSize; + uint64_t MethodID; + uint32_t NameSI; + uint32_t ClassFileSI; + uint32_t SourceFileSI; + uint32_t ParentMI; +}; + +using VTuneMethodTable = std::vector; +using VTuneStringTable = std::vector; + +struct VTuneMethodBatch { + VTuneMethodTable Methods; + VTuneStringTable Strings; +}; + +using VTuneUnloadedMethodIDs = SmallVector>; + +namespace shared { + +using SPSVTuneLineTable = SPSSequence>; +using SPSVTuneMethodInfo = + SPSTuple; +using SPSVTuneMethodTable = SPSSequence; +using SPSVTuneStringTable = SPSSequence; +using SPSVTuneMethodBatch = SPSTuple; +using SPSVTuneUnloadedMethodIDs = SPSSequence>; + +template <> class SPSSerializationTraits { +public: + static size_t size(const VTuneMethodInfo &MI) { + return SPSVTuneMethodInfo::AsArgList::size( + MI.LineTable, MI.LoadAddr, MI.LoadSize, MI.MethodID, MI.NameSI, + MI.ClassFileSI, MI.SourceFileSI, MI.ParentMI); + } + + static bool deserialize(SPSInputBuffer &IB, VTuneMethodInfo &MI) { + return SPSVTuneMethodInfo::AsArgList::deserialize( + IB, MI.LineTable, MI.LoadAddr, MI.LoadSize, MI.MethodID, MI.NameSI, + MI.ClassFileSI, MI.SourceFileSI, MI.ParentMI); + } + + static bool serialize(SPSOutputBuffer &OB, const VTuneMethodInfo &MI) { + return SPSVTuneMethodInfo::AsArgList::serialize( + OB, MI.LineTable, MI.LoadAddr, MI.LoadSize, MI.MethodID, MI.NameSI, + MI.ClassFileSI, MI.SourceFileSI, MI.ParentMI); + } +}; + +template <> +class SPSSerializationTraits { +public: + static size_t size(const VTuneMethodBatch &MB) { + return SPSVTuneMethodBatch::AsArgList::size(MB.Methods, MB.Strings); + } + + static bool deserialize(SPSInputBuffer &IB, VTuneMethodBatch &MB) { + return SPSVTuneMethodBatch::AsArgList::deserialize(IB, MB.Methods, + MB.Strings); + } + + static bool serialize(SPSOutputBuffer &OB, const VTuneMethodBatch &MB) { + return SPSVTuneMethodBatch::AsArgList::serialize(OB, MB.Methods, + MB.Strings); + } +}; + +} // end namespace shared +} // end namespace orc +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h new file mode 100644 index 00000000000000..afb7df592faf27 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h @@ -0,0 +1,31 @@ + +//===------ JITLoaderVTune.h --- Register profiler objects ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register objects for access by profilers via the perf JIT interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERVTUNE_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERVTUNE_H + +#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" +#include + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerVTuneImpl(const char *Data, uint64_t Size); + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_unregisterVTuneImpl(const char *Data, uint64_t Size); + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_test_registerVTuneImpl(const char *Data, uint64_t Size); + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERVTUNE_H + + diff --git a/llvm/include/llvm/IR/DebugProgramInstruction.h b/llvm/include/llvm/IR/DebugProgramInstruction.h index cf30b4d0b0aaf0..a8faf415a3ea87 100644 --- a/llvm/include/llvm/IR/DebugProgramInstruction.h +++ b/llvm/include/llvm/IR/DebugProgramInstruction.h @@ -223,9 +223,19 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DbgRecord &R) { class DPLabel : public DbgRecord { DbgRecordParamRef Label; + /// This constructor intentionally left private, so that it is only called via + /// "createUnresolvedDPLabel", which clearly expresses that it is for parsing + /// only. + DPLabel(MDNode *Label, MDNode *DL); + public: DPLabel(DILabel *Label, DebugLoc DL); + /// For use during parsing; creates a DPLabel from as-of-yet unresolved + /// MDNodes. Trying to access the resulting DPLabel's fields before they are + /// resolved, or if they resolve to the wrong type, will result in a crash. + static DPLabel *createUnresolvedDPLabel(MDNode *Label, MDNode *DL); + DPLabel *clone() const; void print(raw_ostream &O, bool IsForDebug = false) const; void print(raw_ostream &ROS, ModuleSlotTracker &MST, bool IsForDebug) const; @@ -286,6 +296,29 @@ class DPValue : public DbgRecord, protected DebugValueUser { DIAssignID *AssignID, Metadata *Address, DIExpression *AddressExpression, const DILocation *DI); +private: + /// Private constructor for creating new instances during parsing only. Only + /// called through `createUnresolvedDPValue` below, which makes clear that + /// this is used for parsing only, and will later return a subclass depending + /// on which Type is passed. + DPValue(LocationType Type, Metadata *Val, MDNode *Variable, + MDNode *Expression, MDNode *AssignID, Metadata *Address, + MDNode *AddressExpression, MDNode *DI); + +public: + /// Used to create DPValues during parsing, where some metadata references may + /// still be unresolved. Although for some fields a generic `Metadata*` + /// argument is accepted for forward type-references, the verifier and + /// accessors will reject incorrect types later on. The function is used for + /// all types of DPValues for simplicity while parsing, but asserts if any + /// necessary fields are empty or unused fields are not empty, i.e. if the + /// #dbg_assign fields are used for a non-dbg-assign type. + static DPValue *createUnresolvedDPValue(LocationType Type, Metadata *Val, + MDNode *Variable, MDNode *Expression, + MDNode *AssignID, Metadata *Address, + MDNode *AddressExpression, + MDNode *DI); + static DPValue *createDPVAssign(Value *Val, DILocalVariable *Variable, DIExpression *Expression, DIAssignID *AssignID, Value *Address, diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 6aded2ceebe13a..94fba491148b2e 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -736,6 +736,9 @@ HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT) /// Generic shufflevector. HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR) +/// Generic splatvector. +HANDLE_TARGET_OPCODE(G_SPLAT_VECTOR) + /// Generic count trailing zeroes. HANDLE_TARGET_OPCODE(G_CTTZ) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index d2036e478d18f2..d967885aa2d758 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1450,6 +1450,13 @@ def G_SHUFFLE_VECTOR: GenericInstruction { let hasSideEffects = false; } +// Generic splatvector. +def G_SPLAT_VECTOR: GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$val); + let hasSideEffects = false; +} + //------------------------------------------------------------------------------ // Vector reductions //------------------------------------------------------------------------------ diff --git a/llvm/include/llvm/TextAPI/Record.h b/llvm/include/llvm/TextAPI/Record.h index 867d6a23588326..98639b064eaadd 100644 --- a/llvm/include/llvm/TextAPI/Record.h +++ b/llvm/include/llvm/TextAPI/Record.h @@ -103,8 +103,8 @@ class GlobalRecord : public Record { }; GlobalRecord(StringRef Name, RecordLinkage Linkage, SymbolFlags Flags, - Kind GV) - : Record({Name, Linkage, Flags}), GV(GV) {} + Kind GV, bool Inlined) + : Record({Name, Linkage, Flags}), GV(GV), Inlined(Inlined) {} bool isFunction() const { return GV == Kind::Function; } bool isVariable() const { return GV == Kind::Variable; } @@ -112,9 +112,11 @@ class GlobalRecord : public Record { if (GV == Kind::Unknown) GV = V; } + bool isInlined() const { return Inlined; } private: Kind GV; + bool Inlined = false; }; // Define Objective-C instance variable records. diff --git a/llvm/include/llvm/TextAPI/RecordsSlice.h b/llvm/include/llvm/TextAPI/RecordsSlice.h index 57b23e5ea29e71..f934cf7607f1fd 100644 --- a/llvm/include/llvm/TextAPI/RecordsSlice.h +++ b/llvm/include/llvm/TextAPI/RecordsSlice.h @@ -53,10 +53,13 @@ class RecordsSlice { /// \param Linkage The linkage of symbol. /// \param GV The kind of global. /// \param Flags The flags that describe attributes of the symbol. + /// \param Inlined Whether declaration is inlined, only applicable to + /// functions. /// \return The non-owning pointer to added record in slice. GlobalRecord *addGlobal(StringRef Name, RecordLinkage Linkage, GlobalRecord::Kind GV, - SymbolFlags Flags = SymbolFlags::None); + SymbolFlags Flags = SymbolFlags::None, + bool Inlined = false); /// Add ObjC Class record. /// diff --git a/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h b/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h new file mode 100644 index 00000000000000..58f6bbcec5dc9d --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h @@ -0,0 +1,32 @@ +//===- RemoveTrapsPass.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the interface for the pass responsible for removing +/// expensive ubsan checks. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_UBSANOPTIMIZATIONPASS_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_UBSANOPTIMIZATIONPASS_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +// This pass is responsible for removing optional traps, like llvm.ubsantrap +// from the hot code. +class RemoveTrapsPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 52ae9f034e5d34..6d0e79e11eed43 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1476,6 +1476,12 @@ static void computeKnownBitsFromOperator(const Operator *I, if (RV->getType() == I->getType()) { computeKnownBits(RV, Known2, Depth + 1, Q); Known = Known.unionWith(Known2); + // If the function doesn't return properly for all input values + // (e.g. unreachable exits) then there might be conflicts between the + // argument value and the range metadata. Simply discard the known bits + // in case of conflicts. + if (Known.hasConflict()) + Known.resetAll(); } } if (const IntrinsicInst *II = dyn_cast(I)) { diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 5d8a50eee13068..02f64fcfac4f0c 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -438,9 +438,12 @@ lltok::Kind LLLexer::LexCaret() { /// Lex all tokens that start with a # character. /// AttrGrpID ::= #[0-9]+ +/// Hash ::= # lltok::Kind LLLexer::LexHash() { // Handle AttrGrpID: #[0-9]+ - return LexUIntID(lltok::AttrGrpID); + if (isdigit(static_cast(CurPtr[0]))) + return LexUIntID(lltok::AttrGrpID); + return lltok::hash; } /// Lex a label, integer type, keyword, or hexadecimal integer constant. @@ -923,6 +926,21 @@ lltok::Kind LLLexer::LexIdentifier() { #undef DWKEYWORD +// Keywords for debug record types. +#define DBGRECORDTYPEKEYWORD(STR) \ + do { \ + if (Keyword == "dbg_" #STR) { \ + StrVal = #STR; \ + return lltok::DbgRecordType; \ + } \ + } while (false) + + DBGRECORDTYPEKEYWORD(value); + DBGRECORDTYPEKEYWORD(declare); + DBGRECORDTYPEKEYWORD(assign); + DBGRECORDTYPEKEYWORD(label); +#undef DBGRECORDTYPEKEYWORD + if (Keyword.starts_with("DIFlag")) { StrVal.assign(Keyword.begin(), Keyword.end()); return lltok::DIFlag; diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index a91e2f690999e0..e140c94195205a 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -62,6 +62,8 @@ static cl::opt AllowIncompleteIR( "Allow incomplete IR on a best effort basis (references to unknown " "metadata will be dropped)")); +extern llvm::cl::opt UseNewDbgInfoFormat; + static std::string getTypeString(Type *T) { std::string Result; raw_string_ostream Tmp(Result); @@ -69,6 +71,15 @@ static std::string getTypeString(Type *T) { return Tmp.str(); } +// Currently, we should always process modules in the old debug info format by +// default regardless of the module's format in IR; convert it to the old format +// here. +bool finalizeDebugInfoFormat(Module *M) { + if (M) + M->setIsNewDbgInfoFormat(false); + return false; +} + /// Run: module ::= toplevelentity* bool LLParser::Run(bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback) { @@ -86,7 +97,7 @@ bool LLParser::Run(bool UpgradeDebugInfo, } return parseTopLevelEntities() || validateEndOfModule(UpgradeDebugInfo) || - validateEndOfIndex(); + validateEndOfIndex() || finalizeDebugInfoFormat(M); } bool LLParser::parseStandaloneConstantValue(Constant *&C, @@ -6041,6 +6052,17 @@ bool LLParser::parseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc, return false; } +bool isOldDbgFormatIntrinsic(StringRef Name) { + // Exit early for the common (non-debug-intrinsic) case. + // We can make this the only check when we begin supporting all "llvm.dbg" + // intrinsics in the new debug info format. + if (!Name.starts_with("llvm.dbg.")) + return false; + Intrinsic::ID FnID = Function::lookupIntrinsicID(Name); + return FnID == Intrinsic::dbg_declare || FnID == Intrinsic::dbg_value || + FnID == Intrinsic::dbg_assign; +} + /// FunctionHeader /// ::= OptionalLinkage OptionalPreemptionSpecifier OptionalVisibility /// OptionalCallingConv OptRetAttrs OptUnnamedAddr Type GlobalName @@ -6390,9 +6412,31 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { std::string NameStr; - // parse the instructions in this block until we get a terminator. + // Parse the instructions and debug values in this block until we get a + // terminator. Instruction *Inst; + auto DeleteDbgRecord = [](DbgRecord *DR) { DR->deleteRecord(); }; + using DbgRecordPtr = std::unique_ptr; + SmallVector TrailingDbgRecord; do { + // Handle debug records first - there should always be an instruction + // following the debug records, i.e. they cannot appear after the block + // terminator. + while (Lex.getKind() == lltok::hash) { + if (SeenOldDbgInfoFormat) + return error(Lex.getLoc(), "debug record should not appear in a module " + "containing debug info intrinsics"); + SeenNewDbgInfoFormat = true; + Lex.Lex(); + if (!M->IsNewDbgInfoFormat) + M->convertToNewDbgValues(); + + DbgRecord *DR; + if (parseDebugRecord(DR, PFS)) + return true; + TrailingDbgRecord.emplace_back(DR, DeleteDbgRecord); + } + // This instruction may have three possibilities for a name: a) none // specified, b) name specified "%foo =", c) number specified: "%4 =". LocTy NameLoc = Lex.getLoc(); @@ -6437,11 +6481,121 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { // Set the name on the instruction. if (PFS.setInstName(NameID, NameStr, NameLoc, Inst)) return true; + + // Attach any preceding debug values to this instruction. + for (DbgRecordPtr &DR : TrailingDbgRecord) + BB->insertDPValueBefore(DR.release(), Inst->getIterator()); + TrailingDbgRecord.clear(); } while (!Inst->isTerminator()); + assert(TrailingDbgRecord.empty() && + "All debug values should have been attached to an instruction."); + return false; } +/// parseDebugRecord +/// ::= #dbg_label '(' MDNode ')' +/// ::= #dbg_type '(' Metadata ',' MDNode ',' Metadata ',' +/// (MDNode ',' Metadata ',' Metadata ',')? MDNode ')' +bool LLParser::parseDebugRecord(DbgRecord *&DR, PerFunctionState &PFS) { + using RecordKind = DbgRecord::Kind; + using LocType = DPValue::LocationType; + LocTy DPVLoc = Lex.getLoc(); + if (Lex.getKind() != lltok::DbgRecordType) + return error(DPVLoc, "expected debug record type here"); + RecordKind RecordType = StringSwitch(Lex.getStrVal()) + .Case("declare", RecordKind::ValueKind) + .Case("value", RecordKind::ValueKind) + .Case("assign", RecordKind::ValueKind) + .Case("label", RecordKind::LabelKind); + + // Parsing labels is trivial; parse here and early exit, otherwise go into the + // full DPValue processing stage. + if (RecordType == RecordKind::LabelKind) { + Lex.Lex(); + if (parseToken(lltok::lparen, "Expected '(' here")) + return true; + MDNode *Label; + if (parseMDNode(Label)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + MDNode *DbgLoc; + if (parseMDNode(DbgLoc)) + return true; + if (parseToken(lltok::rparen, "Expected ')' here")) + return true; + DR = DPLabel::createUnresolvedDPLabel(Label, DbgLoc); + return false; + } + + LocType ValueType = StringSwitch(Lex.getStrVal()) + .Case("declare", LocType::Declare) + .Case("value", LocType::Value) + .Case("assign", LocType::Assign); + + Lex.Lex(); + if (parseToken(lltok::lparen, "Expected '(' here")) + return true; + + // Parse Value field. + Metadata *ValLocMD; + if (parseMetadata(ValLocMD, &PFS)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + + // Parse Variable field. + MDNode *Variable; + if (parseMDNode(Variable)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + + // Parse Expression field. + MDNode *Expression; + if (parseMDNode(Expression)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + + // Parse additional fields for #dbg_assign. + MDNode *AssignID = nullptr; + Metadata *AddressLocation = nullptr; + MDNode *AddressExpression = nullptr; + if (ValueType == LocType::Assign) { + // Parse DIAssignID. + if (parseMDNode(AssignID)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + + // Parse address ValueAsMetadata. + if (parseMetadata(AddressLocation, &PFS)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + + // Parse address DIExpression. + if (parseMDNode(AddressExpression)) + return true; + if (parseToken(lltok::comma, "Expected ',' here")) + return true; + } + + /// Parse DILocation. + MDNode *DebugLoc; + if (parseMDNode(DebugLoc)) + return true; + + if (parseToken(lltok::rparen, "Expected ')' here")) + return true; + DR = DPValue::createUnresolvedDPValue(ValueType, ValLocMD, Variable, + Expression, AssignID, AddressLocation, + AddressExpression, DebugLoc); + return false; +} //===----------------------------------------------------------------------===// // Instruction Parsing. //===----------------------------------------------------------------------===// @@ -7669,6 +7823,16 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS, } CI->setFastMathFlags(FMF); } + + if (CalleeID.Kind == ValID::t_GlobalName && + isOldDbgFormatIntrinsic(CalleeID.StrVal)) { + if (SeenNewDbgInfoFormat) { + CI->deleteValue(); + return error(CallLoc, "llvm.dbg intrinsic should not appear in a module " + "using non-intrinsic debug info"); + } + SeenOldDbgInfoFormat = true; + } CI->setAttributes(PAL); ForwardRefAttrGroups[CI] = FwdRefAttrGrps; Inst = CI; diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 64e2d517e3b9c4..1869e0d41a51f6 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -309,7 +309,7 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res, // For vectors, CSE the element only for now. LLT Ty = Res.getLLTTy(*getMRI()); if (Ty.isVector()) - return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val)); + return buildSplatBuildVector(Res, buildConstant(Ty.getElementType(), Val)); FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); @@ -336,7 +336,7 @@ MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res, // For vectors, CSE the element only for now. LLT Ty = Res.getLLTTy(*getMRI()); if (Ty.isVector()) - return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val)); + return buildSplatBuildVector(Res, buildFConstant(Ty.getElementType(), Val)); FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 2f18a64ca285bd..ab055b723dbb1f 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6369,6 +6369,9 @@ bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, if (CondTy != LLT::scalar(1)) return false; + if (TrueTy.isPointer()) + return false; + // Both are scalars. std::optional TrueOpt = getIConstantVRegValWithLookThrough(True, MRI); @@ -6713,6 +6716,9 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, LLT CmpTy = MRI.getType(Cmp1->getReg(0)); LLT CmpOperandTy = MRI.getType(R1); + if (CmpOperandTy.isPointer()) + return false; + // We build ands, adds, and constants of type CmpOperandTy. // They must be legal to build. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) || diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 7c986dbbc2c7c8..365870f540daeb 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1598,10 +1598,10 @@ bool IRTranslator::translateGetElementPtr(const User &U, // We might need to splat the base pointer into a vector if the offsets // are vectors. if (WantSplatVector && !PtrTy.isVector()) { - BaseReg = - MIRBuilder - .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg) - .getReg(0); + BaseReg = MIRBuilder + .buildSplatBuildVector(LLT::fixed_vector(VectorWidth, PtrTy), + BaseReg) + .getReg(0); PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth); PtrTy = getLLTForType(*PtrIRTy, *DL); OffsetIRTy = DL->getIndexType(PtrIRTy); @@ -1639,8 +1639,10 @@ bool IRTranslator::translateGetElementPtr(const User &U, LLT IdxTy = MRI->getType(IdxReg); if (IdxTy != OffsetTy) { if (!IdxTy.isVector() && WantSplatVector) { - IdxReg = MIRBuilder.buildSplatVector( - OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0); + IdxReg = MIRBuilder + .buildSplatBuildVector(OffsetTy.changeElementType(IdxTy), + IdxReg) + .getReg(0); } IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); @@ -2997,6 +2999,19 @@ bool IRTranslator::translateExtractElement(const User &U, bool IRTranslator::translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder) { + // A ShuffleVector that has operates on scalable vectors is a splat vector + // where the value of the splat vector is the 0th element of the first + // operand, since the index mask operand is the zeroinitializer (undef and + // poison are treated as zeroinitializer here). + if (U.getOperand(0)->getType()->isScalableTy()) { + Value *Op0 = U.getOperand(0); + auto SplatVal = MIRBuilder.buildExtractVectorElementConstant( + LLT::scalar(Op0->getType()->getScalarSizeInBits()), + getOrCreateVReg(*Op0), 0); + MIRBuilder.buildSplatVector(getOrCreateVReg(U), SplatVal); + return true; + } + ArrayRef Mask; if (auto *SVI = dyn_cast(&U)) Mask = SVI->getShuffleMask(); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 1d016e684c48f6..2ec47f72aca39a 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -8391,7 +8391,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { // For vector types create a G_BUILD_VECTOR. if (Ty.isVector()) - Val = MIB.buildSplatVector(Ty, Val).getReg(0); + Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0); return Val; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index cdd605a5221ad8..28e5bf85ca9ce6 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -326,7 +326,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, auto Const = buildInstr(TargetOpcode::G_CONSTANT) .addDef(getMRI()->createGenericVirtualRegister(EltTy)) .addCImm(&Val); - return buildSplatVector(Res, Const); + return buildSplatBuildVector(Res, Const); } auto Const = buildInstr(TargetOpcode::G_CONSTANT); @@ -363,7 +363,7 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, .addDef(getMRI()->createGenericVirtualRegister(EltTy)) .addFPImm(&Val); - return buildSplatVector(Res, Const); + return buildSplatBuildVector(Res, Const); } auto Const = buildInstr(TargetOpcode::G_FCONSTANT); @@ -711,8 +711,8 @@ MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } -MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, - const SrcOp &Src) { +MachineInstrBuilder MachineIRBuilder::buildSplatBuildVector(const DstOp &Res, + const SrcOp &Src) { SmallVector TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } @@ -742,6 +742,13 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask); } +MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, + const SrcOp &Src) { + assert(Src.getLLTTy(*getMRI()) == Res.getLLTTy(*getMRI()).getElementType() && + "Expected Src to match Dst elt ty"); + return buildInstr(TargetOpcode::G_SPLAT_VECTOR, Res, Src); +} + MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 1d0757c5d7f5f5..ecb3bd33bdfd49 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1640,6 +1640,24 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } + + case TargetOpcode::G_SPLAT_VECTOR: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + + if (!DstTy.isScalableVector()) + report("Destination type must be a scalable vector", MI); + + if (!SrcTy.isScalar()) + report("Source type must be a scalar", MI); + + if (DstTy.getScalarType() != SrcTy) + report("Element type of the destination must be the same type as the " + "source type", + MI); + + break; + } case TargetOpcode::G_DYN_STACKALLOC: { const MachineOperand &DstOp = MI->getOperand(0); const MachineOperand &AllocOp = MI->getOperand(1); diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 07fa92889d8853..61a668907be77d 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ReachingDefAnalysis.h" -#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" @@ -421,9 +421,9 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, return; VisitedBBs.insert(MBB); - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(PhysReg)) + if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return; if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) @@ -469,11 +469,11 @@ MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI, bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, MCRegister PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); // Yes if the register is live out of the basic block. - if (!LiveRegs.available(PhysReg)) + if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return true; // Walk backwards through the block to see if the register is live at some @@ -481,7 +481,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, for (MachineInstr &Last : instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) { LiveRegs.stepBackward(Last); - if (!LiveRegs.available(PhysReg)) + if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return InstIds.lookup(&Last) > InstIds.lookup(MI); } return false; @@ -504,9 +504,9 @@ bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI, bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(PhysReg)) + if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return false; auto Last = MBB->getLastNonDebugInstr(); @@ -525,9 +525,9 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, MachineInstr * ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const { - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(PhysReg)) + if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) return nullptr; auto Last = MBB->getLastNonDebugInstr(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cdcb7114640471..5476ef87971436 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5562,6 +5562,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; + // reassociate minmax + if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags())) + return RMINMAX; + // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. if (!TLI.isOperationLegal(Opcode, VT) && diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index aae1668c1639c4..aff26824dda104 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -630,6 +630,8 @@ Error UnwindTable::parseRows(const CFIProgram &CFIP, UnwindRow &Row, if (LRLoc->getLocation() == UnwindLocation::Constant) { // Toggle the constant value from 0 to 1 or 1 to 0. LRLoc->setConstant(LRLoc->getConstant() ^ 1); + Row.getRegisterLocations().setRegisterLocation( + AArch64DWARFPAuthRaState, *LRLoc); } else { return createStringError( errc::invalid_argument, @@ -858,7 +860,8 @@ CFIProgram::getOperandTypes() { /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand. void CFIProgram::printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, const Instruction &Instr, unsigned OperandIdx, - uint64_t Operand) const { + uint64_t Operand, + std::optional &Address) const { assert(OperandIdx < MaxOperands); uint8_t Opcode = Instr.Opcode; OperandType Type = getOperandTypes()[Opcode][OperandIdx]; @@ -877,6 +880,7 @@ void CFIProgram::printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, break; case OT_Address: OS << format(" %" PRIx64, Operand); + Address = Operand; break; case OT_Offset: // The offsets are all encoded in a unsigned form, but in practice @@ -888,7 +892,11 @@ void CFIProgram::printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, if (CodeAlignmentFactor) OS << format(" %" PRId64, Operand * CodeAlignmentFactor); else - OS << format(" %" PRId64 "*code_alignment_factor" , Operand); + OS << format(" %" PRId64 "*code_alignment_factor", Operand); + if (Address && CodeAlignmentFactor) { + *Address += Operand * CodeAlignmentFactor; + OS << format(" to 0x%" PRIx64, *Address); + } break; case OT_SignedFactDataOffset: if (DataAlignmentFactor) @@ -918,13 +926,14 @@ void CFIProgram::printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, } void CFIProgram::dump(raw_ostream &OS, DIDumpOptions DumpOpts, - unsigned IndentLevel) const { + unsigned IndentLevel, + std::optional Address) const { for (const auto &Instr : Instructions) { uint8_t Opcode = Instr.Opcode; OS.indent(2 * IndentLevel); OS << callFrameString(Opcode) << ":"; for (unsigned i = 0; i < Instr.Ops.size(); ++i) - printOperand(OS, DumpOpts, Instr, i, Instr.Ops[i]); + printOperand(OS, DumpOpts, Instr, i, Instr.Ops[i], Address); OS << '\n'; } } @@ -975,7 +984,7 @@ void CIE::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { OS << "\n"; } OS << "\n"; - CFIs.dump(OS, DumpOpts); + CFIs.dump(OS, DumpOpts, /*IndentLevel=*/1, /*InitialLocation=*/{}); OS << "\n"; if (Expected RowsOrErr = UnwindTable::create(this)) @@ -1003,7 +1012,7 @@ void FDE::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { OS << " Format: " << FormatString(IsDWARF64) << "\n"; if (LSDAAddress) OS << format(" LSDA Address: %016" PRIx64 "\n", *LSDAAddress); - CFIs.dump(OS, DumpOpts); + CFIs.dump(OS, DumpOpts, /*IndentLevel=*/1, InitialLocation); OS << "\n"; if (Expected RowsOrErr = UnwindTable::create(this)) diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt index 5bf23a7ec0bc89..ed52692662a8a3 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_component_library(LLVMOrcDebugging DebuggerSupportPlugin.cpp LLJITUtilsCBindings.cpp PerfSupportPlugin.cpp + VTuneSupportPlugin.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc/Debugging/ diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.cpp new file mode 100644 index 00000000000000..30a9728c8c20e3 --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.cpp @@ -0,0 +1,185 @@ +//===--- VTuneSupportPlugin.cpp -- Support for VTune profiler --*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Handles support for registering code with VIntel Tune's Amplfiier JIT API. +// +//===----------------------------------------------------------------------===// +#include "llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/ExecutionEngine/Orc/Debugging/DebugInfoSupport.h" + +using namespace llvm; +using namespace llvm::orc; +using namespace llvm::jitlink; + +static constexpr StringRef RegisterVTuneImplName = "llvm_orc_registerVTuneImpl"; +static constexpr StringRef UnregisterVTuneImplName = + "llvm_orc_unregisterVTuneImpl"; +static constexpr StringRef RegisterTestVTuneImplName = + "llvm_orc_test_registerVTuneImpl"; + +static VTuneMethodBatch getMethodBatch(LinkGraph &G, bool EmitDebugInfo) { + VTuneMethodBatch Batch; + std::unique_ptr DC; + StringMap> DCBacking; + if (EmitDebugInfo) { + auto EDC = createDWARFContext(G); + if (!EDC) { + EmitDebugInfo = false; + } else { + DC = std::move(EDC->first); + DCBacking = std::move(EDC->second); + } + } + + auto GetStringIdx = [Deduplicator = StringMap(), + &Batch](StringRef S) mutable { + auto I = Deduplicator.find(S); + if (I != Deduplicator.end()) + return I->second; + + Batch.Strings.push_back(S.str()); + return Deduplicator[S] = Batch.Strings.size(); + }; + for (auto Sym : G.defined_symbols()) { + if (!Sym->isCallable()) + continue; + + Batch.Methods.push_back(VTuneMethodInfo()); + auto &Method = Batch.Methods.back(); + Method.MethodID = 0; + Method.ParentMI = 0; + Method.LoadAddr = Sym->getAddress(); + Method.LoadSize = Sym->getSize(); + Method.NameSI = GetStringIdx(Sym->getName()); + Method.ClassFileSI = 0; + Method.SourceFileSI = 0; + + if (!EmitDebugInfo) + continue; + + auto &Section = Sym->getBlock().getSection(); + auto Addr = Sym->getAddress(); + auto SAddr = + object::SectionedAddress{Addr.getValue(), Section.getOrdinal()}; + DILineInfoTable LinesInfo = DC->getLineInfoForAddressRange( + SAddr, Sym->getSize(), + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); + Method.SourceFileSI = Batch.Strings.size(); + Batch.Strings.push_back(DC->getLineInfoForAddress(SAddr).FileName); + for (auto &LInfo : LinesInfo) { + Method.LineTable.push_back( + std::pair{/*unsigned*/ Sym->getOffset(), + /*DILineInfo*/ LInfo.second.Line}); + } + } + return Batch; +} + +void VTuneSupportPlugin::modifyPassConfig(MaterializationResponsibility &MR, + LinkGraph &G, + PassConfiguration &Config) { + Config.PostFixupPasses.push_back([this, MR = &MR](LinkGraph &G) { + // the object file is generated but not linked yet + auto Batch = getMethodBatch(G, EmitDebugInfo); + if (Batch.Methods.empty()) { + return Error::success(); + } + { + std::lock_guard Lock(PluginMutex); + uint64_t Allocated = Batch.Methods.size(); + uint64_t Start = NextMethodID; + NextMethodID += Allocated; + for (size_t i = Start; i < NextMethodID; ++i) { + Batch.Methods[i - Start].MethodID = i; + } + this->PendingMethodIDs[MR] = {Start, Allocated}; + } + G.allocActions().push_back( + {cantFail(shared::WrapperFunctionCall::Create< + shared::SPSArgList>( + RegisterVTuneImplAddr, Batch)), + {}}); + return Error::success(); + }); +} + +Error VTuneSupportPlugin::notifyEmitted(MaterializationResponsibility &MR) { + if (auto Err = MR.withResourceKeyDo([this, MR = &MR](ResourceKey K) { + std::lock_guard Lock(PluginMutex); + auto I = PendingMethodIDs.find(MR); + if (I == PendingMethodIDs.end()) + return; + + LoadedMethodIDs[K].push_back(I->second); + PendingMethodIDs.erase(I); + })) { + return Err; + } + return Error::success(); +} + +Error VTuneSupportPlugin::notifyFailed(MaterializationResponsibility &MR) { + std::lock_guard Lock(PluginMutex); + PendingMethodIDs.erase(&MR); + return Error::success(); +} + +Error VTuneSupportPlugin::notifyRemovingResources(JITDylib &JD, ResourceKey K) { + // Unregistration not required if not provided + if (!UnregisterVTuneImplAddr) { + return Error::success(); + } + VTuneUnloadedMethodIDs UnloadedIDs; + { + std::lock_guard Lock(PluginMutex); + auto I = LoadedMethodIDs.find(K); + if (I == LoadedMethodIDs.end()) + return Error::success(); + + UnloadedIDs = std::move(I->second); + LoadedMethodIDs.erase(I); + } + if (auto Err = EPC.callSPSWrapper( + UnregisterVTuneImplAddr, UnloadedIDs)) + return Err; + + return Error::success(); +} + +void VTuneSupportPlugin::notifyTransferringResources(JITDylib &JD, + ResourceKey DstKey, + ResourceKey SrcKey) { + std::lock_guard Lock(PluginMutex); + auto I = LoadedMethodIDs.find(SrcKey); + if (I == LoadedMethodIDs.end()) + return; + + auto &Dest = LoadedMethodIDs[DstKey]; + Dest.insert(Dest.end(), I->second.begin(), I->second.end()); + LoadedMethodIDs.erase(SrcKey); +} + +Expected> +VTuneSupportPlugin::Create(ExecutorProcessControl &EPC, JITDylib &JD, + bool EmitDebugInfo, bool TestMode) { + auto &ES = EPC.getExecutionSession(); + auto RegisterImplName = + ES.intern(TestMode ? RegisterTestVTuneImplName : RegisterVTuneImplName); + auto UnregisterImplName = ES.intern(UnregisterVTuneImplName); + SymbolLookupSet SLS{RegisterImplName, UnregisterImplName}; + auto Res = ES.lookup(makeJITDylibSearchOrder({&JD}), std::move(SLS)); + if (!Res) + return Res.takeError(); + ExecutorAddr RegisterImplAddr( + Res->find(RegisterImplName)->second.getAddress()); + ExecutorAddr UnregisterImplAddr( + Res->find(UnregisterImplName)->second.getAddress()); + return std::make_unique( + EPC, RegisterImplAddr, UnregisterImplAddr, EmitDebugInfo); +} diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index fffa95ee72b719..131728fd7e7e4c 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -455,9 +455,10 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { ProcessSymbol(Sym); // Attempt to claim all weak defs that we're not already responsible for. - // This cannot fail -- any clashes will just result in rejection of our - // claim, at which point we'll externalize that symbol. - cantFail(MR->defineMaterializing(std::move(NewSymbolsToClaim))); + // This may fail if the resource tracker has become defunct, but should + // always succeed otherwise. + if (auto Err = MR->defineMaterializing(std::move(NewSymbolsToClaim))) + return Err; // Walk the list of symbols that we just tried to claim. Symbols that we're // responsible for are marked live. Symbols that we're not responsible for @@ -719,14 +720,22 @@ Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR, for (auto &P : Plugins) Err = joinErrors(std::move(Err), P->notifyEmitted(MR)); - if (Err) + if (Err) { + if (FA) + Err = joinErrors(std::move(Err), MemMgr.deallocate(std::move(FA))); return Err; + } if (!FA) return Error::success(); - return MR.withResourceKeyDo( + Err = MR.withResourceKeyDo( [&](ResourceKey K) { Allocs[K].push_back(std::move(FA)); }); + + if (Err) + Err = joinErrors(std::move(Err), MemMgr.deallocate(std::move(FA))); + + return Err; } Error ObjectLinkingLayer::handleRemoveResources(JITDylib &JD, ResourceKey K) { diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt index f2005dc1775e3c..3d1dfe758c79dd 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt @@ -2,10 +2,18 @@ if( CMAKE_HOST_UNIX AND HAVE_LIBRT ) set(rt_lib rt) endif() +set(intel_jit_profiling ) +if( LLVM_USE_INTEL_JITEVENTS ) + set(intel_jit_profiling IntelJITProfiling) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../IntelJITProfiling) + include_directories(${PROJECT_BINARY_DIR}/ittapi/include/ ) +endif() + add_llvm_component_library(LLVMOrcTargetProcess ExecutorSharedMemoryMapperService.cpp JITLoaderGDB.cpp JITLoaderPerf.cpp + JITLoaderVTune.cpp OrcRTBootstrap.cpp RegisterEHFrames.cpp SimpleExecutorDylibManager.cpp @@ -21,6 +29,7 @@ add_llvm_component_library(LLVMOrcTargetProcess ${rt_lib} LINK_COMPONENTS + ${intel_jit_profiling} OrcShared Support TargetParser diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp new file mode 100644 index 00000000000000..d346214d3ae291 --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp @@ -0,0 +1,224 @@ +//===------- JITLoaderVTune.cpp - Register profiler objects -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register objects for access by profilers via the VTune JIT interface. +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h" +#include "llvm/ExecutionEngine/Orc/Shared/VTuneSharedStructs.h" +#include + +#if LLVM_USE_INTEL_JITEVENTS +#include "IntelJITEventsWrapper.h" +#include "ittnotify.h" + +using namespace llvm; +using namespace llvm::orc; + +namespace { +class JITEventWrapper { +public: + static std::unique_ptr Wrapper; +}; +std::unique_ptr JITEventWrapper::Wrapper; +} // namespace + +static Error registerJITLoaderVTuneRegisterImpl(const VTuneMethodBatch &MB) { + const size_t StringsSize = MB.Strings.size(); + + for (const auto &MethodInfo : MB.Methods) { + iJIT_Method_Load MethodMessage; + memset(&MethodMessage, 0, sizeof(iJIT_Method_Load)); + + MethodMessage.method_id = MethodInfo.MethodID; + if (MethodInfo.NameSI != 0 && MethodInfo.NameSI < StringsSize) { + MethodMessage.method_name = + const_cast(MB.Strings.at(MethodInfo.NameSI).data()); + } else { + MethodMessage.method_name = NULL; + } + if (MethodInfo.ClassFileSI != 0 && MethodInfo.ClassFileSI < StringsSize) { + MethodMessage.class_file_name = + const_cast(MB.Strings.at(MethodInfo.ClassFileSI).data()); + } else { + MethodMessage.class_file_name = NULL; + } + if (MethodInfo.SourceFileSI != 0 && MethodInfo.SourceFileSI < StringsSize) { + MethodMessage.source_file_name = + const_cast(MB.Strings.at(MethodInfo.SourceFileSI).data()); + } else { + MethodMessage.source_file_name = NULL; + } + + MethodMessage.method_load_address = MethodInfo.LoadAddr.toPtr(); + MethodMessage.method_size = MethodInfo.LoadSize; + MethodMessage.class_id = 0; + + MethodMessage.user_data = NULL; + MethodMessage.user_data_size = 0; + MethodMessage.env = iJDE_JittingAPI; + + std::vector LineInfo; + for (const auto &LInfo : MethodInfo.LineTable) { + LineInfo.push_back(LineNumberInfo{LInfo.first, LInfo.second}); + } + + if (LineInfo.size() == 0) { + MethodMessage.line_number_size = 0; + MethodMessage.line_number_table = 0; + } else { + MethodMessage.line_number_size = LineInfo.size(); + MethodMessage.line_number_table = &*LineInfo.begin(); + } + JITEventWrapper::Wrapper->iJIT_NotifyEvent( + iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &MethodMessage); + } + + return Error::success(); +} + +static void registerJITLoaderVTuneUnregisterImpl( + const std::vector> &UM) { + for (auto &Method : UM) { + JITEventWrapper::Wrapper->iJIT_NotifyEvent( + iJVM_EVENT_TYPE_METHOD_UNLOAD_START, + const_cast(&Method.first)); + } +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + if (!JITEventWrapper::Wrapper) + JITEventWrapper::Wrapper.reset(new IntelJITEventsWrapper); + + return WrapperFunction::handle( + Data, Size, registerJITLoaderVTuneRegisterImpl) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_unregisterVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle( + Data, Size, registerJITLoaderVTuneUnregisterImpl) + .release(); +} + +// For Testing: following code comes from llvm-jitlistener.cpp in llvm tools +namespace { +using SourceLocations = std::vector>; +using NativeCodeMap = std::map; +NativeCodeMap ReportedDebugFuncs; +} // namespace + +static int NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) { + switch (EventType) { + case iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED: { + if (!EventSpecificData) { + errs() << "Error: The JIT event listener did not provide a event data."; + return -1; + } + iJIT_Method_Load *msg = static_cast(EventSpecificData); + + ReportedDebugFuncs[msg->method_id]; + + outs() << "Method load [" << msg->method_id << "]: " << msg->method_name + << ", Size = " << msg->method_size << "\n"; + + for (unsigned int i = 0; i < msg->line_number_size; ++i) { + if (!msg->line_number_table) { + errs() << "A function with a non-zero line count had no line table."; + return -1; + } + std::pair loc( + std::string(msg->source_file_name), + msg->line_number_table[i].LineNumber); + ReportedDebugFuncs[msg->method_id].push_back(loc); + outs() << " Line info @ " << msg->line_number_table[i].Offset << ": " + << msg->source_file_name << ", line " + << msg->line_number_table[i].LineNumber << "\n"; + } + outs() << "\n"; + } break; + case iJVM_EVENT_TYPE_METHOD_UNLOAD_START: { + if (!EventSpecificData) { + errs() << "Error: The JIT event listener did not provide a event data."; + return -1; + } + unsigned int UnloadId = + *reinterpret_cast(EventSpecificData); + assert(1 == ReportedDebugFuncs.erase(UnloadId)); + outs() << "Method unload [" << UnloadId << "]\n"; + } break; + default: + break; + } + return 0; +} + +static iJIT_IsProfilingActiveFlags IsProfilingActive(void) { + // for testing, pretend we have an Intel Parallel Amplifier XE 2011 + // instance attached + return iJIT_SAMPLING_ON; +} + +static unsigned int GetNewMethodID(void) { + static unsigned int id = 0; + return ++id; +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_test_registerVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + JITEventWrapper::Wrapper.reset(new IntelJITEventsWrapper( + NotifyEvent, NULL, NULL, IsProfilingActive, 0, 0, GetNewMethodID)); + return WrapperFunction::handle( + Data, Size, registerJITLoaderVTuneRegisterImpl) + .release(); +} + +#else + +using namespace llvm; +using namespace llvm::orc; + +static Error unsupportedBatch(const VTuneMethodBatch &MB) { + return llvm::make_error("unsupported for Intel VTune", + inconvertibleErrorCode()); +} + +static void unsuppported(const std::vector> &UM) { + +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle( + Data, Size, unsupportedBatch) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_unregisterVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle(Data, Size, + unsuppported) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_test_registerVTuneImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle( + Data, Size, unsupportedBatch) + .release(); +} + +#endif diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp index a8d64024e1797b..5ff1e8c19db68b 100644 --- a/llvm/lib/IR/DebugProgramInstruction.cpp +++ b/llvm/lib/IR/DebugProgramInstruction.cpp @@ -138,11 +138,38 @@ DbgRecord::createDebugIntrinsic(Module *M, Instruction *InsertBefore) const { llvm_unreachable("unsupported DbgRecord kind"); } +DPLabel::DPLabel(MDNode *Label, MDNode *DL) + : DbgRecord(LabelKind, DebugLoc(DL)), Label(Label) { + assert(Label && "Unexpected nullptr"); + assert((isa(Label) || Label->isTemporary()) && + "Label type must be or resolve to a DILabel"); +} DPLabel::DPLabel(DILabel *Label, DebugLoc DL) : DbgRecord(LabelKind, DL), Label(Label) { assert(Label && "Unexpected nullptr"); } +DPLabel *DPLabel::createUnresolvedDPLabel(MDNode *Label, MDNode *DL) { + return new DPLabel(Label, DL); +} + +DPValue::DPValue(DPValue::LocationType Type, Metadata *Val, MDNode *Variable, + MDNode *Expression, MDNode *AssignID, Metadata *Address, + MDNode *AddressExpression, MDNode *DI) + : DbgRecord(ValueKind, DebugLoc(DI)), + DebugValueUser({Val, Address, AssignID}), Type(Type), Variable(Variable), + Expression(Expression), AddressExpression(AddressExpression) {} + +DPValue *DPValue::createUnresolvedDPValue(DPValue::LocationType Type, + Metadata *Val, MDNode *Variable, + MDNode *Expression, MDNode *AssignID, + Metadata *Address, + MDNode *AddressExpression, + MDNode *DI) { + return new DPValue(Type, Val, Variable, Expression, AssignID, Address, + AddressExpression, DI); +} + DPValue *DPValue::createDPValue(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI) { return new DPValue(ValueAsMetadata::get(Location), DV, Expr, DI, diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 74c7354e7bf1bb..fd5f7d57c258d4 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6291,7 +6291,7 @@ void Verifier::visit(DPValue &DPV) { Var->getRawType()); auto *DLNode = DPV.getDebugLoc().getAsMDNode(); - CheckDI(isa_and_nonnull(DLNode), "invalid #dbg record location", + CheckDI(isa_and_nonnull(DLNode), "invalid #dbg record DILocation", &DPV, DLNode); DILocation *Loc = DPV.getDebugLoc(); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index e0bc57f8bf72f0..4d1eb10d2d41c6 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -177,6 +177,7 @@ #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" +#include "llvm/Transforms/Instrumentation/RemoveTrapsPass.h" #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index a345e8d72d9399..41f16d0915bf23 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -422,6 +422,7 @@ FUNCTION_PASS("print", UniformityInfoPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass()) FUNCTION_PASS("reg2mem", RegToMemPass()) +FUNCTION_PASS("remove-traps", RemoveTrapsPass()) FUNCTION_PASS("safe-stack", SafeStackPass(TM)) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 11cf42bbc80e85..5cc612e89162af 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -197,7 +197,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -989,7 +988,7 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, } } -static void getLiveRegsForEntryMBB(LiveRegUnits &LiveRegs, +static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { const MachineFunction *MF = MBB.getParent(); LiveRegs.addLiveIns(MBB); @@ -1019,15 +1018,16 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { const AArch64Subtarget &Subtarget = MF->getSubtarget(); const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); - LiveRegUnits LiveRegs(TRI); + LivePhysRegs LiveRegs(TRI); getLiveRegsForEntryMBB(LiveRegs, *MBB); // Prefer X9 since it was historically used for the prologue scratch reg. - if (LiveRegs.available(AArch64::X9)) + const MachineRegisterInfo &MRI = MF->getRegInfo(); + if (LiveRegs.available(MRI, AArch64::X9)) return AArch64::X9; - for (Register Reg : AArch64::GPR64RegClass) { - if (LiveRegs.available(Reg)) + for (unsigned Reg : AArch64::GPR64RegClass) { + if (LiveRegs.available(MRI, Reg)) return Reg; } return AArch64::NoRegister; @@ -1044,11 +1044,13 @@ bool AArch64FrameLowering::canUseAsPrologue( if (AFI->hasSwiftAsyncContext()) { const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); - LiveRegUnits LiveRegs(TRI); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + LivePhysRegs LiveRegs(TRI); getLiveRegsForEntryMBB(LiveRegs, MBB); // The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are // available. - if (!LiveRegs.available(AArch64::X16) || !LiveRegs.available(AArch64::X17)) + if (!LiveRegs.available(MRI, AArch64::X16) || + !LiveRegs.available(MRI, AArch64::X17)) return false; } @@ -1601,7 +1603,7 @@ static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB, /// Collect live registers from the end of \p MI's parent up to (including) \p /// MI in \p LiveRegs. static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, - LiveRegUnits &LiveRegs) { + LivePhysRegs &LiveRegs) { MachineBasicBlock &MBB = *MI.getParent(); LiveRegs.addLiveOuts(MBB); @@ -1639,7 +1641,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, NonFrameStart->getFlag(MachineInstr::FrameSetup)) ++NonFrameStart; - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); if (NonFrameStart != MBB.end()) { getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs); // Ignore registers used for stack management for now. @@ -1657,7 +1659,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, make_range(MBB.instr_begin(), NonFrameStart->getIterator())) { for (auto &Op : MI.operands()) if (Op.isReg() && Op.isDef()) - assert(LiveRegs.available(Op.getReg()) && + assert(!LiveRegs.contains(Op.getReg()) && "live register clobbered by inserted prologue instructions"); } }); @@ -4012,7 +4014,7 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, // FIXME : This approach of bailing out from merge is conservative in // some ways like even if stg loops are not present after merge the // insert list, this liveness check is done (which is not needed). - LiveRegUnits LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo())); + LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo())); LiveRegs.addLiveOuts(*MBB); for (auto I = MBB->rbegin();; ++I) { MachineInstr &MI = *I; @@ -4021,7 +4023,7 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, LiveRegs.stepBackward(*I); } InsertI++; - if (!LiveRegs.available(AArch64::NZCV)) + if (LiveRegs.contains(AArch64::NZCV)) return InsertI; llvm::stable_sort(Instrs, diff --git a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp index ce3bc0b1837558..41bbc003fd9bf7 100644 --- a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp +++ b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp @@ -220,7 +220,20 @@ void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - assert (MF.size() == 1); + + // Depending on whether this pass is in the same FunctionPassManager as the + // IR->MIR conversion, the thunk may be completely empty, or contain a single + // basic block with a single return instruction. Normalise it to contain a + // single empty basic block. + if (MF.size() == 1) { + assert(MF.front().size() == 1); + assert(MF.front().front().getOpcode() == AArch64::RET); + MF.front().erase(MF.front().begin()); + } else { + assert(MF.size() == 0); + MF.push_back(MF.CreateMachineBasicBlock()); + } + MachineBasicBlock *Entry = &MF.front(); Entry->clear(); diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td index e50a401f8b2aec..c7dfd64b2fb24e 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td @@ -1372,18 +1372,18 @@ def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>; // Arithmetic, basic // Logical def : InstRW<[V1Write_2c_1V01], - (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$", - "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]$", + (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]", + "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]", "^ADR_[SU]XTW_ZZZ_D_[0123]$", "^ADR_LSL_ZZZ_[SD]_[0123]$", - "^[SU]ABD_ZP[mZ]Z_[BHSD]$", - "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]$", + "^[SU]ABD_ZP[mZ]Z_[BHSD]", + "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]", "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$", - "^SUBR_Z(I|P[mZ]Z)_[BHSD]$", + "^SUBR_Z(I|P[mZ]Z)_[BHSD]", "^(AND|EOR|ORR)_ZI$", - "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$", + "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZP?ZZ", "^EOR(BT|TB)_ZZZ_[BHSD]$", - "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>; + "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]")>; // Arithmetic, shift def : InstRW<[V1Write_2c_1V1], @@ -1394,10 +1394,10 @@ def : InstRW<[V1Write_2c_1V1], "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; // Arithmetic, shift right for divide -def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_ZP[mZ]I_[BHSD]$")>; +def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>; // Count/reverse bits -def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>; +def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>; // Broadcast logical bitmask immediate to vector def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>; @@ -1420,10 +1420,10 @@ def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", "^[SU]CVTF_ZPmZ_StoD")>; // Convert to floating point, 32b to single or half -def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>; +def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; // Convert to floating point, 16b to half -def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>; +def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; // Copy, scalar def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>; @@ -1432,10 +1432,12 @@ def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>; def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>; // Divides, 32 bit -def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>; +def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S", + "^[SU]DIV_ZPZZ_S")>; // Divides, 64 bit -def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>; +def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D", + "^[SU]DIV_ZPZZ_D")>; // Dot product, 8 bit def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>; @@ -1454,9 +1456,9 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$", def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>; // Extend, sign or zero -def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$", - "^[SU]XTH_ZPmZ_[SD]$", - "^[SU]XTW_ZPmZ_[D]$")>; +def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]", + "^[SU]XTH_ZPmZ_[SD]", + "^[SU]XTW_ZPmZ_[D]")>; // Extract def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>; @@ -1489,18 +1491,22 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; // Multiply, B, H, S element size -def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ)_[BHS]$", - "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>; +def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]", + "^MUL_ZPZZ_[BHS]", + "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]", + "^[SU]MULH_ZPZZ_[BHS]")>; // Multiply, D element size // Multiply accumulate, D element size -def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ)_D$", - "^[SU]MULH_ZPmZ_D$", - "^(MLA|MLS|MAD|MSB)_ZPmZZ_D$")>; +def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D", + "^MUL_ZPZZ_D", + "^[SU]MULH_(ZPmZ|ZZZ)_D", + "^[SU]MULH_ZPZZ_D", + "^(MLA|MLS|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>; // Multiply accumulate, B, H, S element size // NOTE: This is not specified in the SOG. -def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; +def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>; // Predicate counting vector def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>; @@ -1547,12 +1553,17 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$", // ----------------------------------------------------------------------------- // Floating point absolute value/difference +def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]", + "^FABD_ZPZZ_[HSD]", + "^FABS_ZPmZ_[HSD]")>; + // Floating point arithmetic -def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]$", - "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", - "^FADDP_ZPmZZ_[HSD]$", - "^FNEG_ZPmZ_[HSD]$", - "^FSUBR_ZPm[IZ]_[HSD]$")>; +def : InstRW<[V1Write_2c_1V01], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]", + "^F(ADD|SUB)_ZPZ[IZ]_[HSD]", + "^FADDP_ZPmZZ_[HSD]", + "^FNEG_ZPmZ_[HSD]", + "^FSUBR_ZPm[IZ]_[HSD]", + "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>; // Floating point associative add, F16 def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>; @@ -1577,40 +1588,44 @@ def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$", // Floating point convert, long or narrow (F16 to F32 or F32 to F16) // Floating point convert to integer, F32 -def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$", - "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>; +def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", + "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) // Floating point convert to integer, F64 -def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$", - "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>; +def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", + "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; // Floating point convert to integer, F16 -def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>; +def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; // Floating point copy def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$", "^FDUP_ZI_[HSD]$")>; // Floating point divide, F16 -def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_ZPmZ_H$")>; +def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; // Floating point divide, F32 -def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_ZPmZ_S$")>; +def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; // Floating point divide, F64 -def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_ZPmZ_D$")>; +def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; // Floating point min/max -def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>; +def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]", + "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>; // Floating point multiply -def : InstRW<[V1Write_3c_1V01], (instregex "^F(SCALE|MULX)_ZPmZ_[HSD]$", - "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>; +def : InstRW<[V1Write_3c_1V01], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]", + "^FMULX_ZPZZ_[HSD]", + "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]", + "^FMUL_ZPZ[IZ]_[HSD]")>; // Floating point multiply accumulate // Floating point reciprocal step def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$", + "^FN?ML[AS]_ZPZZZ_[HSD]", "^FML[AS]_ZZZI_[HSD]$", "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; @@ -1624,7 +1639,7 @@ def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>; def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>; // Floating point reciprocal exponent -def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]$")>; +def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]")>; // Floating point reduction, F16 def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>; @@ -1636,22 +1651,22 @@ def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$ def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>; // Floating point round to integral, F16 -def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>; +def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; // Floating point round to integral, F32 -def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>; +def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; // Floating point round to integral, F64 -def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>; +def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; // Floating point square root, F16 -def : InstRW<[V1Write_13c10_1V0], (instrs FSQRT_ZPmZ_H)>; +def : InstRW<[V1Write_13c10_1V0], (instregex "^FSQRT_ZPmZ_H")>; // Floating point square root, F32 -def : InstRW<[V1Write_10c7_1V0], (instrs FSQRT_ZPmZ_S)>; +def : InstRW<[V1Write_10c7_1V0], (instregex "^FSQRT_ZPmZ_S")>; // Floating point square root, F64 -def : InstRW<[V1Write_16c7_1V0], (instrs FSQRT_ZPmZ_D)>; +def : InstRW<[V1Write_16c7_1V0], (instregex "^FSQRT_ZPmZ_D")>; // Floating point trigonometric def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$", diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index 807ce40bc5eac1..f10b94523d2e03 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -2567,13 +2567,13 @@ def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; // Floating point square root, F16 -def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>; +def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H")>; // Floating point square root, F32 -def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>; +def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S")>; // Floating point square root, F64 -def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>; +def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D")>; // Floating point trigonometric exponentiation def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 64c4ecd1fd6d51..e5e60459e8148a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -820,9 +820,6 @@ void AArch64PassConfig::addPreSched2() { // info. addPass(createAArch64SpeculationHardeningPass()); - addPass(createAArch64IndirectThunks()); - addPass(createAArch64SLSHardeningPass()); - if (TM->getOptLevel() != CodeGenOptLevel::None) { if (EnableFalkorHWPFFix) addPass(createFalkorHWPFFixPass()); @@ -855,6 +852,8 @@ void AArch64PassConfig::addPreEmitPass() { } void AArch64PassConfig::addPostBBSections() { + addPass(createAArch64IndirectThunks()); + addPass(createAArch64SLSHardeningPass()); addPass(createAArch64PointerAuthPass()); if (EnableBranchTargets) addPass(createAArch64BranchTargetsPass()); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index ad389cfc75aa94..36adada2796531 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -884,6 +884,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64})) + .moreElementsToNextPow2(0) .widenVectorEltsToVectorMinSize(0, 64); getActionDefinitionsBuilder(G_BUILD_VECTOR) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 37a36b26b947c6..d9970a200804ae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -868,8 +868,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks( &STM, ProgInfo.NumSGPRsForWavesPerEU); - ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( - &STM, ProgInfo.NumVGPRsForWavesPerEU); + ProgInfo.VGPRBlocks = + IsaInfo::getEncodedNumVGPRBlocks(&STM, ProgInfo.NumVGPRsForWavesPerEU); const SIModeRegisterDefaults Mode = MFI->getMode(); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index cb4eddfe5320fa..d5efd441556252 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5344,8 +5344,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks( NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; } - VGPRBlocks = - IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); + VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs, + EnableWavefrontSize32); SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); return false; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index b984126d844722..9c6934865bfa55 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -2902,7 +2902,7 @@ multiclass MTBUF_Real_AllAddr_gfx11_gfx12 op> multiclass MTBUF_Real_AllAddr_gfx11_gfx12_Renamed op, string real_name> : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl { defvar ps = get_BUF_ps; - def : MnemonicAlias, Requires<[isGFX11Plus]>; + def : Mnem_gfx11_gfx12; } defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x008, "tbuffer_load_d16_format_x">; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 5b94102770cd56..a84227ebf506fe 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1210,33 +1210,24 @@ class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12 op, DS_Pseudo ps, int ef, // GFX12. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in { - multiclass DS_Real_gfx12 op> { - defvar ps = !cast(NAME); +multiclass DS_Real_gfx12 op, string name = !tolower(NAME)> { + defvar ps = !cast(NAME); + let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in def _gfx12 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12; - } - - multiclass DS_Real_Renamed_gfx12 op, string name> { - defvar ps = !cast(NAME); - def _gfx12 : - Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12, - MnemonicAlias, - Requires<[isGFX12Plus]>; - } -} // End AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" - -defm DS_MIN_F32 : DS_Real_Renamed_gfx12<0x012, "ds_min_num_f32">; -defm DS_MAX_F32 : DS_Real_Renamed_gfx12<0x013, "ds_max_num_f32">; -defm DS_MIN_RTN_F32 : DS_Real_Renamed_gfx12<0x032, "ds_min_num_rtn_f32">; -defm DS_MAX_RTN_F32 : DS_Real_Renamed_gfx12<0x033, "ds_max_num_rtn_f32">; -defm DS_MIN_F64 : DS_Real_Renamed_gfx12<0x052, "ds_min_num_f64">; -defm DS_MAX_F64 : DS_Real_Renamed_gfx12<0x053, "ds_max_num_f64">; -defm DS_MIN_RTN_F64 : DS_Real_Renamed_gfx12<0x072, "ds_min_num_rtn_f64">; -defm DS_MAX_RTN_F64 : DS_Real_Renamed_gfx12<0x073, "ds_max_num_rtn_f64">; + name, /*hasGDS=*/false>; + if !ne(ps.Mnemonic, name) then + def : MnemonicAlias, Requires<[isGFX12Plus]>; +} + +defm DS_MIN_F32 : DS_Real_gfx12<0x012, "ds_min_num_f32">; +defm DS_MAX_F32 : DS_Real_gfx12<0x013, "ds_max_num_f32">; +defm DS_MIN_RTN_F32 : DS_Real_gfx12<0x032, "ds_min_num_rtn_f32">; +defm DS_MAX_RTN_F32 : DS_Real_gfx12<0x033, "ds_max_num_rtn_f32">; +defm DS_MIN_F64 : DS_Real_gfx12<0x052, "ds_min_num_f64">; +defm DS_MAX_F64 : DS_Real_gfx12<0x053, "ds_max_num_f64">; +defm DS_MIN_RTN_F64 : DS_Real_gfx12<0x072, "ds_min_num_rtn_f64">; +defm DS_MAX_RTN_F64 : DS_Real_gfx12<0x073, "ds_max_num_rtn_f64">; defm DS_COND_SUB_U32 : DS_Real_gfx12<0x098>; defm DS_SUB_CLAMP_U32 : DS_Real_gfx12<0x099>; defm DS_COND_SUB_RTN_U32 : DS_Real_gfx12<0x0a8>; @@ -1256,65 +1247,57 @@ def : MnemonicAlias<"ds_subrev_rtn_u64", "ds_rsub_rtn_u64">, Requires<[isGFX12Pl // GFX11. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { - multiclass DS_Real_gfx11 op> { +multiclass DS_Real_gfx11 op, string name = !tolower(NAME)> { + defvar ps = !cast(NAME); + let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in def _gfx11 : - Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12(NAME), - SIEncodingFamily.GFX11>; - } - - multiclass DS_Real_Renamed_gfx11 op, string name> { - defvar ps = !cast(NAME); - def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12, - MnemonicAlias, Requires<[isGFX11Only]>; - } -} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" - -multiclass DS_Real_gfx11_gfx12 op> - : DS_Real_gfx11, DS_Real_gfx12; - -multiclass DS_Real_Renamed_gfx11_gfx12 op, string name> - : DS_Real_Renamed_gfx11, - DS_Real_Renamed_gfx12; - -defm DS_WRITE_B32 : DS_Real_Renamed_gfx11_gfx12<0x00d, "ds_store_b32">; -defm DS_WRITE2_B32 : DS_Real_Renamed_gfx11_gfx12<0x00e, "ds_store_2addr_b32">; -defm DS_WRITE2ST64_B32 : DS_Real_Renamed_gfx11_gfx12<0x00f, "ds_store_2addr_stride64_b32">; -defm DS_WRITE_B8 : DS_Real_Renamed_gfx11_gfx12<0x01e, "ds_store_b8">; -defm DS_WRITE_B16 : DS_Real_Renamed_gfx11_gfx12<0x01f, "ds_store_b16">; -defm DS_WRXCHG_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02d, "ds_storexchg_rtn_b32">; -defm DS_WRXCHG2_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02e, "ds_storexchg_2addr_rtn_b32">; -defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02f, "ds_storexchg_2addr_stride64_rtn_b32">; -defm DS_READ_B32 : DS_Real_Renamed_gfx11_gfx12<0x036, "ds_load_b32">; -defm DS_READ2_B32 : DS_Real_Renamed_gfx11_gfx12<0x037, "ds_load_2addr_b32">; -defm DS_READ2ST64_B32 : DS_Real_Renamed_gfx11_gfx12<0x038, "ds_load_2addr_stride64_b32">; -defm DS_READ_I8 : DS_Real_Renamed_gfx11_gfx12<0x039, "ds_load_i8">; -defm DS_READ_U8 : DS_Real_Renamed_gfx11_gfx12<0x03a, "ds_load_u8">; -defm DS_READ_I16 : DS_Real_Renamed_gfx11_gfx12<0x03b, "ds_load_i16">; -defm DS_READ_U16 : DS_Real_Renamed_gfx11_gfx12<0x03c, "ds_load_u16">; -defm DS_WRITE_B64 : DS_Real_Renamed_gfx11_gfx12<0x04d, "ds_store_b64">; -defm DS_WRITE2_B64 : DS_Real_Renamed_gfx11_gfx12<0x04e, "ds_store_2addr_b64">; -defm DS_WRITE2ST64_B64 : DS_Real_Renamed_gfx11_gfx12<0x04f, "ds_store_2addr_stride64_b64">; -defm DS_WRXCHG_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06d, "ds_storexchg_rtn_b64">; -defm DS_WRXCHG2_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06e, "ds_storexchg_2addr_rtn_b64">; -defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06f, "ds_storexchg_2addr_stride64_rtn_b64">; -defm DS_READ_B64 : DS_Real_Renamed_gfx11_gfx12<0x076, "ds_load_b64">; -defm DS_READ2_B64 : DS_Real_Renamed_gfx11_gfx12<0x077, "ds_load_2addr_b64">; -defm DS_READ2ST64_B64 : DS_Real_Renamed_gfx11_gfx12<0x078, "ds_load_2addr_stride64_b64">; -defm DS_WRITE_B8_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a0, "ds_store_b8_d16_hi">; -defm DS_WRITE_B16_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a1, "ds_store_b16_d16_hi">; -defm DS_READ_U8_D16 : DS_Real_Renamed_gfx11_gfx12<0x0a2, "ds_load_u8_d16">; -defm DS_READ_U8_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a3, "ds_load_u8_d16_hi">; -defm DS_READ_I8_D16 : DS_Real_Renamed_gfx11_gfx12<0x0a4, "ds_load_i8_d16">; -defm DS_READ_I8_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a5, "ds_load_i8_d16_hi">; -defm DS_READ_U16_D16 : DS_Real_Renamed_gfx11_gfx12<0x0a6, "ds_load_u16_d16">; -defm DS_READ_U16_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a7, "ds_load_u16_d16_hi">; -defm DS_WRITE_ADDTID_B32 : DS_Real_Renamed_gfx11_gfx12<0x0b0, "ds_store_addtid_b32">; -defm DS_READ_ADDTID_B32 : DS_Real_Renamed_gfx11_gfx12<0x0b1, "ds_load_addtid_b32">; -defm DS_WRITE_B96 : DS_Real_Renamed_gfx11_gfx12<0x0de, "ds_store_b96">; -defm DS_WRITE_B128 : DS_Real_Renamed_gfx11_gfx12<0x0df, "ds_store_b128">; -defm DS_READ_B96 : DS_Real_Renamed_gfx11_gfx12<0x0fe, "ds_load_b96">; -defm DS_READ_B128 : DS_Real_Renamed_gfx11_gfx12<0x0ff, "ds_load_b128">; + Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12; + if !ne(ps.Mnemonic, name) then + def : MnemonicAlias, Requires<[isGFX11Only]>; +} + +multiclass DS_Real_gfx11_gfx12 op, string name = !tolower(NAME)> + : DS_Real_gfx11, DS_Real_gfx12; + +defm DS_WRITE_B32 : DS_Real_gfx11_gfx12<0x00d, "ds_store_b32">; +defm DS_WRITE2_B32 : DS_Real_gfx11_gfx12<0x00e, "ds_store_2addr_b32">; +defm DS_WRITE2ST64_B32 : DS_Real_gfx11_gfx12<0x00f, "ds_store_2addr_stride64_b32">; +defm DS_WRITE_B8 : DS_Real_gfx11_gfx12<0x01e, "ds_store_b8">; +defm DS_WRITE_B16 : DS_Real_gfx11_gfx12<0x01f, "ds_store_b16">; +defm DS_WRXCHG_RTN_B32 : DS_Real_gfx11_gfx12<0x02d, "ds_storexchg_rtn_b32">; +defm DS_WRXCHG2_RTN_B32 : DS_Real_gfx11_gfx12<0x02e, "ds_storexchg_2addr_rtn_b32">; +defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx11_gfx12<0x02f, "ds_storexchg_2addr_stride64_rtn_b32">; +defm DS_READ_B32 : DS_Real_gfx11_gfx12<0x036, "ds_load_b32">; +defm DS_READ2_B32 : DS_Real_gfx11_gfx12<0x037, "ds_load_2addr_b32">; +defm DS_READ2ST64_B32 : DS_Real_gfx11_gfx12<0x038, "ds_load_2addr_stride64_b32">; +defm DS_READ_I8 : DS_Real_gfx11_gfx12<0x039, "ds_load_i8">; +defm DS_READ_U8 : DS_Real_gfx11_gfx12<0x03a, "ds_load_u8">; +defm DS_READ_I16 : DS_Real_gfx11_gfx12<0x03b, "ds_load_i16">; +defm DS_READ_U16 : DS_Real_gfx11_gfx12<0x03c, "ds_load_u16">; +defm DS_WRITE_B64 : DS_Real_gfx11_gfx12<0x04d, "ds_store_b64">; +defm DS_WRITE2_B64 : DS_Real_gfx11_gfx12<0x04e, "ds_store_2addr_b64">; +defm DS_WRITE2ST64_B64 : DS_Real_gfx11_gfx12<0x04f, "ds_store_2addr_stride64_b64">; +defm DS_WRXCHG_RTN_B64 : DS_Real_gfx11_gfx12<0x06d, "ds_storexchg_rtn_b64">; +defm DS_WRXCHG2_RTN_B64 : DS_Real_gfx11_gfx12<0x06e, "ds_storexchg_2addr_rtn_b64">; +defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx11_gfx12<0x06f, "ds_storexchg_2addr_stride64_rtn_b64">; +defm DS_READ_B64 : DS_Real_gfx11_gfx12<0x076, "ds_load_b64">; +defm DS_READ2_B64 : DS_Real_gfx11_gfx12<0x077, "ds_load_2addr_b64">; +defm DS_READ2ST64_B64 : DS_Real_gfx11_gfx12<0x078, "ds_load_2addr_stride64_b64">; +defm DS_WRITE_B8_D16_HI : DS_Real_gfx11_gfx12<0x0a0, "ds_store_b8_d16_hi">; +defm DS_WRITE_B16_D16_HI : DS_Real_gfx11_gfx12<0x0a1, "ds_store_b16_d16_hi">; +defm DS_READ_U8_D16 : DS_Real_gfx11_gfx12<0x0a2, "ds_load_u8_d16">; +defm DS_READ_U8_D16_HI : DS_Real_gfx11_gfx12<0x0a3, "ds_load_u8_d16_hi">; +defm DS_READ_I8_D16 : DS_Real_gfx11_gfx12<0x0a4, "ds_load_i8_d16">; +defm DS_READ_I8_D16_HI : DS_Real_gfx11_gfx12<0x0a5, "ds_load_i8_d16_hi">; +defm DS_READ_U16_D16 : DS_Real_gfx11_gfx12<0x0a6, "ds_load_u16_d16">; +defm DS_READ_U16_D16_HI : DS_Real_gfx11_gfx12<0x0a7, "ds_load_u16_d16_hi">; +defm DS_WRITE_ADDTID_B32 : DS_Real_gfx11_gfx12<0x0b0, "ds_store_addtid_b32">; +defm DS_READ_ADDTID_B32 : DS_Real_gfx11_gfx12<0x0b1, "ds_load_addtid_b32">; +defm DS_WRITE_B96 : DS_Real_gfx11_gfx12<0x0de, "ds_store_b96">; +defm DS_WRITE_B128 : DS_Real_gfx11_gfx12<0x0df, "ds_store_b128">; +defm DS_READ_B96 : DS_Real_gfx11_gfx12<0x0fe, "ds_load_b96">; +defm DS_READ_B128 : DS_Real_gfx11_gfx12<0x0ff, "ds_load_b128">; // DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped // comparing to pre-GFX11. diff --git a/llvm/lib/Target/AMDGPU/EXPInstructions.td b/llvm/lib/Target/AMDGPU/EXPInstructions.td index cce8734b72d4a9..b73b83031af0d6 100644 --- a/llvm/lib/Target/AMDGPU/EXPInstructions.td +++ b/llvm/lib/Target/AMDGPU/EXPInstructions.td @@ -37,18 +37,18 @@ class EXP_Pseudo } // Real instruction with optional asm operands "compr" and "vm". -class EXP_Real_ComprVM(pseudo)> +class EXP_Real_ComprVM : EXPCommon<0, ps.done, "exp$tgt $src0, $src1, $src2, $src3" #!if(ps.done, " done", "")#"$compr$vm">, - SIMCInstr { + SIMCInstr { let AsmMatchConverter = "cvtExp"; } // Real instruction with optional asm operand "row_en". -class EXP_Real_Row(pseudo)> +class EXP_Real_Row : EXPCommon, - SIMCInstr { + SIMCInstr { let AsmMatchConverter = "cvtExp"; } @@ -71,7 +71,7 @@ def EXP_ROW_DONE : EXP_Pseudo<1, 1>; multiclass EXP_Real_si { defvar ps = !cast(NAME); - def _si : EXP_Real_ComprVM, EXPe_ComprVM { + def _si : EXP_Real_ComprVM, EXPe_ComprVM { let AssemblerPredicate = isGFX6GFX7; let DecoderNamespace = "GFX6GFX7"; let done = ps.done; @@ -80,7 +80,7 @@ multiclass EXP_Real_si { multiclass EXP_Real_vi { defvar ps = !cast(NAME); - def _vi : EXP_Real_ComprVM, EXPe_vi { + def _vi : EXP_Real_ComprVM, EXPe_vi { let AssemblerPredicate = isGFX8GFX9; let SubtargetPredicate = isNotGFX90APlus; let DecoderNamespace = "GFX8"; @@ -90,7 +90,7 @@ multiclass EXP_Real_vi { multiclass EXP_Real_gfx10 { defvar ps = !cast(NAME); - def _gfx10 : EXP_Real_ComprVM, EXPe_ComprVM { + def _gfx10 : EXP_Real_ComprVM, EXPe_ComprVM { let AssemblerPredicate = isGFX10Only; let DecoderNamespace = "GFX10"; let done = ps.done; @@ -106,7 +106,7 @@ defm EXP_DONE : EXP_Real_si, EXP_Real_vi, EXP_Real_gfx10; multiclass EXP_Real_gfx11 { defvar ps = !cast(NAME); - def _gfx11 : EXP_Real_Row, EXPe_Row { + def _gfx11 : EXP_Real_Row, EXPe_Row { let AssemblerPredicate = isGFX11Only; let DecoderNamespace = "GFX11"; let row = ps.row; @@ -116,7 +116,7 @@ multiclass EXP_Real_gfx11 { multiclass VEXPORT_Real_gfx12 { defvar ps = !cast(NAME); - def _gfx12 : EXP_Real_Row, + def _gfx12 : EXP_Real_Row, EXPe_Row, MnemonicAlias<"exp", "export">, Requires<[isGFX12Plus, HasExportInsts]> { let AssemblerPredicate = isGFX12Only; let DecoderNamespace = "GFX12"; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 7bed0d8ef0d670..e515b729e7d7e8 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2136,6 +2136,41 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { return WaitStatesNeeded; } +static int +GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { + // 2 pass -> 3 + // 4 pass -> 5 + // 8 pass -> 9 + // 16 pass -> 17 + return NumPasses + 1; +} + +static int +GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { + // 2 pass -> 2 + // 4 pass -> 4 + // 8 pass -> 8 + // 16 pass -> 16 + return NumPasses; +} + +static int +GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { + // 2 pass -> 4 + // 4 pass -> 6 + // 8 pass -> 10 + // 16 pass -> 18 + return NumPasses + 2; +} + +static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { + // 2 pass -> 5 + // 4 pass -> 7 + // 8 pass -> 11 + // 16 pass -> 19 + return NumPasses + 3; +} + int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { int WaitStatesNeeded = 0; unsigned Opc = MI->getOpcode(); @@ -2164,13 +2199,6 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { for (const MachineOperand &Use : MI->explicit_uses()) { const int LegacyVALUNotDotWritesVGPRWaitStates = 2; const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2; - const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3; - const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5; - const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4; - const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9; - const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8; - const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17; - const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16; const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8; const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16; const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3; @@ -2181,14 +2209,6 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5; const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11; const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19; - const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4; - const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6; - const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10; - const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18; - const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5; - const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7; - const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11; - const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19; const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6; const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; @@ -2250,42 +2270,40 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates; break; default: - if (ST.hasGFX940Insts() && isXDL(ST, *MI) && !isXDL(ST, *MI1)) + int NumPasses = TSchedModel.computeInstrLatency(MI1); + if (ST.hasGFX940Insts()) { + if (isXDL(ST, *MI) && !isXDL(ST, *MI1)) + break; + + NeedWaitStates = + isXDL(ST, *MI1) + ? GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( + NumPasses) + : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( + NumPasses); break; - switch (TSchedModel.computeInstrLatency(MI1)) { + } + + switch (NumPasses) { case 2: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates - : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates - : isDGEMM(Opc) - ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; - break; - case 4: - assert(ST.hasGFX940Insts()); - NeedWaitStates = isXDL(ST, *MI1) - ? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates - : GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates; + NeedWaitStates = + isDGEMM(Opc) ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; break; case 8: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates - : GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates - : isDGEMM(Opc) - ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; + NeedWaitStates = + isDGEMM(Opc) + ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; + break; + case 16: + NeedWaitStates = + isDGEMM(Opc) + ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; break; - case 16: [[fallthrough]]; default: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates - : GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates - : isDGEMM(Opc) - ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; + llvm_unreachable("unexpected number of passes"); } } } @@ -2302,34 +2320,30 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { NeedWaitStates = DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates; break; default: - switch (TSchedModel.computeInstrLatency(MI1)) { + int NumPasses = TSchedModel.computeInstrLatency(MI1); + + if (ST.hasGFX940Insts()) { + NeedWaitStates = + isXDL(ST, *MI1) + ? GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates( + NumPasses) + : GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates( + NumPasses); + break; + } + + switch (NumPasses) { case 2: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates - : GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates - : SMFMA4x4WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = SMFMA4x4WritesVGPROverlappedSrcABWaitStates; break; case 4: - assert(ST.hasGFX940Insts()); - NeedWaitStates = isXDL(ST, *MI1) - ? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates - : GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates; - break; + llvm_unreachable("unexpected number of passes for mfma"); case 8: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates - : GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates - : SMFMA16x16WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = SMFMA16x16WritesVGPROverlappedSrcABWaitStates; break; case 16: [[fallthrough]]; default: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MI1) - ? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates - : GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates - : SMFMA32x32WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = SMFMA32x32WritesVGPROverlappedSrcABWaitStates; } } } @@ -2393,6 +2407,38 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { return WaitStatesNeeded; } +static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { + // 2 pass -> 4 + // 4 pass -> 6 + // 8 pass -> 10 + // 16 pass -> 18 + return NumPasses + 2; +} + +static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { + // 2 pass -> 5 + // 4 pass -> 7 + // 8 pass -> 11 + // 16 pass -> 19 + return NumPasses + 3; +} + +static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) { + // 2 pass -> 5 + // 4 pass -> 7 + // 8 pass -> 11 + // 16 pass -> 19 + return NumPasses + 3; +} + +static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) { + // 2 pass -> 4 + // 4 pass -> 6 + // 8 pass -> 10 + // 16 pass -> 18 + return NumPasses + 2; +} + int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { if (!ST.hasGFX90AInsts()) return 0; @@ -2455,14 +2501,6 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5; const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11; const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19; - const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4; - const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6; - const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10; - const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18; - const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5; - const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7; - const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11; - const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19; const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9; const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18; const int DMFMA4x4WriteVgprVALUReadWaitStates = 6; @@ -2516,47 +2554,44 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { continue; unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA); + int NumPasses = HazardDefLatency; int NeedWaitStates = MaxWaitStates; - switch (HazardDefLatency) { - case 2: - NeedWaitStates = - ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) - ? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates - : GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates - : SMFMA4x4WriteVgprVALUMemExpReadWaitStates; - break; - case 4: - assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts()); - NeedWaitStates = - isDGEMM(MFMA->getOpcode()) - ? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates - : DMFMA4x4WriteVgprVALUReadWaitStates - : isXDL(ST, *MFMA) - ? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates - : GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates; - break; - case 8: - NeedWaitStates = - isDGEMM(MFMA->getOpcode()) - ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates - : DMFMA16x16WriteVgprVALUReadWaitStates - : ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) - ? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates - : GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates - : SMFMA16x16WriteVgprVALUMemExpReadWaitStates; - break; - case 16: [[fallthrough]]; - default: - assert(!isDGEMM(MFMA->getOpcode())); + + if (isDGEMM(MFMA->getOpcode())) { + switch (HazardDefLatency) { + case 4: + NeedWaitStates = IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates + : DMFMA4x4WriteVgprVALUReadWaitStates; + break; + case 8: + case 16: + NeedWaitStates = IsMemOrExport + ? DMFMA16x16WriteVgprMemExpReadWaitStates + : DMFMA16x16WriteVgprVALUReadWaitStates; + break; + default: + llvm_unreachable("unexpected dgemm"); + } + } else if (ST.hasGFX940Insts()) { NeedWaitStates = - ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) - ? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates - : GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates - : SMFMA32x32WriteVgprVALUMemExpReadWaitStates; - break; + isXDL(ST, *MFMA) + ? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(NumPasses) + : GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates( + NumPasses); + } else { + switch (HazardDefLatency) { + case 2: + NeedWaitStates = SMFMA4x4WriteVgprVALUMemExpReadWaitStates; + break; + case 8: + NeedWaitStates = SMFMA16x16WriteVgprVALUMemExpReadWaitStates; + break; + case 16: + NeedWaitStates = SMFMA32x32WriteVgprVALUMemExpReadWaitStates; + break; + default: + llvm_unreachable("unexpected number of passes for mfma"); + } } int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; @@ -2585,14 +2620,6 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const int SMFMA4x4WriteVgprVALUWawWaitStates = 5; const int SMFMA16x16WriteVgprVALUWawWaitStates = 11; const int SMFMA32x32WriteVgprVALUWawWaitStates = 19; - const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4; - const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6; - const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10; - const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18; - const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5; - const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7; - const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11; - const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19; const int SMFMA4x4ReadVgprVALUWarWaitStates = 1; const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3; const int SMFMA16x16ReadVgprVALUWarWaitStates = 7; @@ -2617,42 +2644,39 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); if (MFMA) { int NeedWaitStates = MaxWaitStates; - switch (TSchedModel.computeInstrLatency(MFMA)) { - case 2: - NeedWaitStates = ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) - ? GFX940_XDL2PassWriteVgprVALUWawWaitStates - : GFX940_SMFMA2PassWriteVgprVALUWawWaitStates - : SMFMA4x4WriteVgprVALUWawWaitStates; - break; - case 4: - assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts()); - NeedWaitStates = isDGEMM(MFMA->getOpcode()) - ? DMFMA4x4WriteVgprVALUWriteWaitStates - : isXDL(ST, *MFMA) - ? GFX940_XDL4PassWriteVgprVALUWawWaitStates - : GFX940_SMFMA4PassWriteVgprVALUWawWaitStates; - break; - case 8: - NeedWaitStates = - isDGEMM(MFMA->getOpcode()) ? DMFMA16x16WriteVgprVALUWriteWaitStates - : + int NumPasses = TSchedModel.computeInstrLatency(MFMA); - ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) ? GFX940_XDL8PassWriteVgprVALUWawWaitStates - : GFX940_SMFMA8PassWriteVgprVALUWawWaitStates - : SMFMA16x16WriteVgprVALUWawWaitStates; - break; - case 16: [[fallthrough]]; - default: - assert(!isDGEMM(MFMA->getOpcode())); + if (isDGEMM(MFMA->getOpcode())) { + switch (NumPasses) { + case 4: + NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates; + break; + case 8: + case 16: + NeedWaitStates = DMFMA16x16WriteVgprVALUWriteWaitStates; + break; + default: + llvm_unreachable("unexpected number of cycles for dgemm"); + } + } else if (ST.hasGFX940Insts()) { NeedWaitStates = - ST.hasGFX940Insts() - ? isXDL(ST, *MFMA) - ? GFX940_XDL16PassWriteVgprVALUWawWaitStates - : GFX940_SMFMA16PassWriteVgprVALUWawWaitStates - : SMFMA32x32WriteVgprVALUWawWaitStates; - break; + isXDL(ST, *MFMA) + ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(NumPasses) + : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses); + } else { + switch (NumPasses) { + case 2: + NeedWaitStates = SMFMA4x4WriteVgprVALUWawWaitStates; + break; + case 8: + NeedWaitStates = SMFMA16x16WriteVgprVALUWawWaitStates; + break; + case 16: + NeedWaitStates = SMFMA32x32WriteVgprVALUWawWaitStates; + break; + default: + llvm_unreachable("Unexpected number of passes for mfma"); + } } int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index d510e729512571..e3f54d01eb22a2 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -11,7 +11,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIRegisterInfo.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -313,7 +313,7 @@ MachineBasicBlock::reverse_iterator SIOptimizeExecMasking::findExecCopy( return E; } -// XXX - Seems LiveRegUnits doesn't work correctly since it will incorrectly +// XXX - Seems LivePhysRegs doesn't work correctly since it will incorrectly // report the register as unavailable because a super-register with a lane mask // is unavailable. static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) { @@ -383,7 +383,7 @@ bool SIOptimizeExecMasking::isRegisterInUseBetween(MachineInstr &Stop, MCRegister Reg, bool UseLiveOuts, bool IgnoreStart) const { - LiveRegUnits LR(*TRI); + LivePhysRegs LR(*TRI); if (UseLiveOuts) LR.addLiveOuts(*Stop.getParent()); @@ -396,7 +396,7 @@ bool SIOptimizeExecMasking::isRegisterInUseBetween(MachineInstr &Stop, LR.stepBackward(*A); } - return !LR.available(Reg); + return !LR.available(*MRI, Reg); } // Determine if a register Reg is not re-defined and still in use diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 0eab7acc9ebce9..62903a244dc892 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1060,10 +1060,15 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); } +static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, + unsigned Granule) { + return divideCeil(std::max(1u, NumRegs), Granule); +} + unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { - NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); // SGPRBlocks is actual number of SGPR blocks minus 1. - return NumSGPRs / getSGPREncodingGranule(STI) - 1; + return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) - + 1; } unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, @@ -1158,14 +1163,19 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { return std::min(MaxNumVGPRs, AddressableNumVGPRs); } -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, - std::optional EnableWavefrontSize32) { - NumVGPRs = alignTo(std::max(1u, NumVGPRs), - getVGPREncodingGranule(STI, EnableWavefrontSize32)); - // VGPRBlocks is actual number of VGPR blocks minus 1. - return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; +unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional EnableWavefrontSize32) { + return getGranulatedNumRegisterBlocks( + NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) - + 1; } +unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, + unsigned NumVGPRs, + std::optional EnableWavefrontSize32) { + return getGranulatedNumRegisterBlocks( + NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32)); +} } // end namespace IsaInfo void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 6edf01d1217f2d..bb307cb67c9b79 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -316,13 +316,20 @@ unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs); /// \returns Number of VGPR blocks needed for given subtarget \p STI when -/// \p NumVGPRs are used. +/// \p NumVGPRs are used. We actually return the number of blocks -1, since +/// that's what we encode. /// /// For subtargets which support it, \p EnableWavefrontSize32 should match the /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. -unsigned -getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, - std::optional EnableWavefrontSize32 = std::nullopt); +unsigned getEncodedNumVGPRBlocks( + const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional EnableWavefrontSize32 = std::nullopt); + +/// \returns Number of VGPR blocks that need to be allocated for the given +/// subtarget \p STI when \p NumVGPRs are used. +unsigned getAllocatedNumVGPRBlocks( + const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional EnableWavefrontSize32 = std::nullopt); } // end namespace IsaInfo diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 13fe79b4759608..53578682e00246 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -112,7 +112,7 @@ class VOP2_Real : VOP2_Real { let AssemblerPredicate = Gen.AssemblerPredicate; - let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); + let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1272,7 +1272,7 @@ class VOP2_DPP16_Gen op, VOP2_DPP_Pseudo ps, GFXGen Gen, string opName = ps.OpName, VOPProfile p = ps.Pfl> : VOP2_DPP16 { let AssemblerPredicate = Gen.AssemblerPredicate; - let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); + let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1301,7 +1301,7 @@ class VOP2_DPP8_Gen op, VOP2_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : VOP2_DPP8 { let AssemblerPredicate = Gen.AssemblerPredicate; - let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); + let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 334cfad478f151..3340ded9d36000 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -893,7 +893,7 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile>; } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 -let SubtargetPredicate = HasDot9Insts, IsDOT=1 in { +let OtherPredicates = [HasDot9Insts], IsDOT=1 in { defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile, int_amdgcn_fdot2_f16_f16>; defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile, int_amdgcn_fdot2_bf16_bf16>; } diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 9bcf0007974485..6121055eb02176 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -31,7 +31,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -109,7 +109,7 @@ namespace { const ARMSubtarget *STI; const TargetLowering *TL; ARMFunctionInfo *AFI; - LiveRegUnits LiveRegs; + LivePhysRegs LiveRegs; RegisterClassInfo RegClassInfo; MachineBasicBlock::const_iterator LiveRegPos; bool LiveRegsValid; @@ -589,7 +589,7 @@ unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { } for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) - if (LiveRegs.available(Reg)) + if (LiveRegs.available(MF->getRegInfo(), Reg)) return Reg; return 0; } diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index a8cf036f363cdd..0f4ece64bff532 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -612,11 +612,11 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { static void findTemporariesForLR(const BitVector &GPRsNoLRSP, const BitVector &PopFriendly, - const LiveRegUnits &UsedRegs, unsigned &PopReg, + const LivePhysRegs &UsedRegs, unsigned &PopReg, unsigned &TmpReg, MachineRegisterInfo &MRI) { PopReg = TmpReg = 0; for (auto Reg : GPRsNoLRSP.set_bits()) { - if (UsedRegs.available(Reg)) { + if (UsedRegs.available(MRI, Reg)) { // Remember the first pop-friendly register and exit. if (PopFriendly.test(Reg)) { PopReg = Reg; @@ -684,7 +684,7 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // Look for a temporary register to use. // First, compute the liveness information. const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - LiveRegUnits UsedRegs(TRI); + LivePhysRegs UsedRegs(TRI); UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4c3dc63afd878d..9b748cdcf74511 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7246,25 +7246,25 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, // (select c, -1, y) -> -c | y if (isAllOnesConstant(TrueV)) { SDValue Neg = DAG.getNegative(CondV, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); + return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV)); } // (select c, y, -1) -> (c-1) | y if (isAllOnesConstant(FalseV)) { SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT)); - return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); + return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV)); } // (select c, 0, y) -> (c-1) & y if (isNullConstant(TrueV)) { SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT)); - return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); + return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV)); } // (select c, y, 0) -> -c & y if (isNullConstant(FalseV)) { SDValue Neg = DAG.getNegative(CondV, DL, VT); - return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); + return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV)); } } @@ -7290,13 +7290,13 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, // (select !x, x, y) -> x & y if (std::optional MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) { return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV, - FalseV); + DAG.getFreeze(FalseV)); } // (select x, y, x) -> x & y // (select !x, y, x) -> x | y if (std::optional MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) { - return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV, - FalseV); + return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, + DAG.getFreeze(TrueV), FalseV); } } @@ -15283,13 +15283,62 @@ static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); } +// Recursively split up concat_vectors with more than 2 operands: +// +// concat_vector op1, op2, op3, op4 +// -> +// concat_vector (concat_vector op1, op2), (concat_vector op3, op4) +// +// This reduces the length of the chain of vslideups and allows us to perform +// the vslideups at a smaller LMUL, limited to MF2. +// +// We do this as a DAG combine rather than during lowering so that any undef +// operands can get combined away. +static SDValue +performCONCAT_VECTORSSplitCombine(SDNode *N, SelectionDAG &DAG, + const RISCVTargetLowering &TLI) { + SDLoc DL(N); + + if (N->getNumOperands() <= 2) + return SDValue(); + + if (!TLI.isTypeLegal(N->getValueType(0))) + return SDValue(); + MVT VT = N->getSimpleValueType(0); + + // Don't split any further than MF2. + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) + ContainerVT = getContainerForFixedLengthVector(DAG, VT, TLI.getSubtarget()); + if (ContainerVT.bitsLT(getLMUL1VT(ContainerVT))) + return SDValue(); + + MVT HalfVT = VT.getHalfNumVectorElementsVT(); + assert(isPowerOf2_32(N->getNumOperands())); + size_t HalfNumOps = N->getNumOperands() / 2; + SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, + N->ops().take_front(HalfNumOps)); + SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, + N->ops().drop_front(HalfNumOps)); + + // Lower to an insert_subvector directly so the concat_vectors don't get + // recombined. + SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Lo, + DAG.getVectorIdxConstant(0, DL)); + Vec = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, VT, Vec, Hi, + DAG.getVectorIdxConstant(HalfVT.getVectorMinNumElements(), DL)); + return Vec; +} + // If we're concatenating a series of vector loads like // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... // Then we can turn this into a strided load by widening the vector elements // vlse32 p, stride=n -static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget, - const RISCVTargetLowering &TLI) { +static SDValue +performCONCAT_VECTORSStridedLoadCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, + const RISCVTargetLowering &TLI) { SDLoc DL(N); EVT VT = N->getValueType(0); @@ -16394,7 +16443,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return V; break; case ISD::CONCAT_VECTORS: - if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) + if (SDValue V = + performCONCAT_VECTORSStridedLoadCombine(N, DAG, Subtarget, *this)) + return V; + if (SDValue V = performCONCAT_VECTORSSplitCombine(N, DAG, *this)) return V; break; case ISD::INSERT_VECTOR_ELT: @@ -20868,7 +20920,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { unsigned Op = Inst.getOpcode(); if (Op == Instruction::Add || Op == Instruction::Sub || Op == Instruction::And || Op == Instruction::Or || - Op == Instruction::Xor || Op == Instruction::InsertElement) + Op == Instruction::Xor || Op == Instruction::InsertElement || + Op == Instruction::Xor || Op == Instruction::ShuffleVector) return false; if (Inst.getType()->isScalableTy()) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index 6b43d4393f7670..8ea1560e5b372e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -41,9 +41,9 @@ def DIV : ALU_rr<0b0000001, 0b100, "div">, def DIVU : ALU_rr<0b0000001, 0b101, "divu">, Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; def REM : ALU_rr<0b0000001, 0b110, "rem">, - Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; + Sched<[WriteIRem, ReadIRem, ReadIRem]>; def REMU : ALU_rr<0b0000001, 0b111, "remu">, - Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; + Sched<[WriteIRem, ReadIRem, ReadIRem]>; } // Predicates = [HasStdExtM] let Predicates = [HasStdExtMOrZmmul, IsRV64], IsSignExtendingOpW = 1 in { @@ -57,9 +57,9 @@ def DIVW : ALUW_rr<0b0000001, 0b100, "divw">, def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">, Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; def REMW : ALUW_rr<0b0000001, 0b110, "remw">, - Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; + Sched<[WriteIRem32, ReadIRem32, ReadIRem32]>; def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">, - Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; + Sched<[WriteIRem32, ReadIRem32, ReadIRem32]>; } // Predicates = [HasStdExtM, IsRV64] //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index 60fa1a848306d8..e74c7aab7474da 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -77,6 +77,16 @@ def : WriteRes { let ReleaseAtCycles = [33]; } +// Integer remainder +def : WriteRes { + let Latency = 34; + let ReleaseAtCycles = [34]; +} +def : WriteRes { + let Latency = 33; + let ReleaseAtCycles = [33]; +} + // Memory def : WriteRes; def : WriteRes; @@ -189,6 +199,8 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 0430d603620b6a..b21a56bdcdd20a 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -189,6 +189,7 @@ class SiFive7AnyToGPRBypass WriteREV8, WriteORCB, WriteSFB, WriteIMul, WriteIMul32, WriteIDiv, WriteIDiv32, + WriteIRem, WriteIRem32, WriteLDB, WriteLDH, WriteLDW, WriteLDD]>; // SiFive7 machine model for scheduling and other instruction cost heuristics. @@ -273,6 +274,16 @@ def : WriteRes { let ReleaseAtCycles = [1, 33]; } +// Integer remainder +def : WriteRes { + let Latency = 66; + let ReleaseAtCycles = [1, 65]; +} +def : WriteRes { + let Latency = 34; + let ReleaseAtCycles = [1, 33]; +} + // Bitmanip let Latency = 3 in { // Rotates are in the late-B ALU. @@ -946,6 +957,8 @@ def : SiFive7AnyToGPRBypass; def : SiFive7AnyToGPRBypass; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td index 01398dea14a3b9..d02d34a0fb9c58 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td @@ -86,6 +86,16 @@ def : WriteRes { let ReleaseAtCycles = [1, 19]; } +// Integer remainder +def : WriteRes { + let Latency = 35; + let ReleaseAtCycles = [1, 34]; +} +def : WriteRes { + let Latency = 20; + let ReleaseAtCycles = [1, 19]; +} + let Latency = 1 in { // Bitmanip def : WriteRes; @@ -258,6 +268,8 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td index f2c07810867bd2..9625d17e0b2600 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td @@ -54,10 +54,12 @@ def : WriteRes; def : WriteRes; def : WriteRes; -// Integer division: latency 33, inverse throughput 33 +// Integer division/remainder: latency 33, inverse throughput 33 let Latency = 33, ReleaseAtCycles = [33] in { def : WriteRes; def : WriteRes; +def : WriteRes; +def : WriteRes; } // Load/store instructions on SCR1 have latency 2 and inverse throughput 2 @@ -147,6 +149,8 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td index 667b5983cb401c..ef491edf3671f8 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td +++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td @@ -64,11 +64,13 @@ def : WriteRes; def : WriteRes; } -// Integer division +// Integer division/remainder // SRT16 algorithm let Latency = 20, ReleaseAtCycles = [20] in { def : WriteRes; def : WriteRes; +def : WriteRes; +def : WriteRes; } // Zb* @@ -221,6 +223,8 @@ def : XS2LoadToALUBypass; def : XS2LoadToALUBypass; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index 593921bfcc67ab..1d19624342d2bb 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -13,8 +13,10 @@ def WriteShiftImm : SchedWrite; // 32 or 64-bit shift by immediate operatio def WriteShiftImm32 : SchedWrite; // 32-bit shift by immediate operations on RV64Ix def WriteShiftReg : SchedWrite; // 32 or 64-bit shift by immediate operations def WriteShiftReg32 : SchedWrite; // 32-bit shift by immediate operations on RV64Ix -def WriteIDiv : SchedWrite; // 32-bit or 64-bit divide and remainder -def WriteIDiv32 : SchedWrite; // 32-bit divide and remainder on RV64I +def WriteIDiv : SchedWrite; // 32-bit or 64-bit divide +def WriteIDiv32 : SchedWrite; // 32-bit divide on RV64I +def WriteIRem : SchedWrite; // 32-bit or 64-bit remainder +def WriteIRem32 : SchedWrite; // 32-bit remainder on RV64I def WriteIMul : SchedWrite; // 32-bit or 64-bit multiply def WriteIMul32 : SchedWrite; // 32-bit multiply on RV64I def WriteJmp : SchedWrite; // Jump @@ -135,6 +137,8 @@ def ReadShiftReg : SchedRead; def ReadShiftReg32 : SchedRead; // 32-bit shift by register operations on RV64Ix def ReadIDiv : SchedRead; def ReadIDiv32 : SchedRead; +def ReadIRem : SchedRead; +def ReadIRem32 : SchedRead; def ReadIMul : SchedRead; def ReadIMul32 : SchedRead; def ReadAtomicBA : SchedRead; diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp index e58f50e471fc0e..7423ed429ffb68 100644 --- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -18,7 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -690,9 +690,9 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { // Walk backwards through the block looking for comparisons, recording // all CC users as we go. The subroutines can delete Compare and // instructions before it. - LiveRegUnits LiveRegs(*TRI); + LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(MBB); - bool CompleteCCUsers = LiveRegs.available(SystemZ::CC); + bool CompleteCCUsers = !LiveRegs.contains(SystemZ::CC); SmallVector CCUsers; MachineBasicBlock::iterator MBBI = MBB.end(); while (MBBI != MBB.begin()) { diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 53e9bf9a9d1bb0..046a12208467b4 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -18,7 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -1874,9 +1874,9 @@ prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const { } } if (CCLive) { - LiveRegUnits LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo()); + LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo()); LiveRegs.addLiveOuts(*MBB); - if (!LiveRegs.available(SystemZ::CC)) + if (LiveRegs.contains(SystemZ::CC)) return false; } diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index c0adfdbf120bdf..30b22fa1ce92de 100644 --- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -46,7 +46,7 @@ class SystemZShortenInst : public MachineFunctionPass { const SystemZInstrInfo *TII; const TargetRegisterInfo *TRI; - LiveRegUnits LiveRegs; + LivePhysRegs LiveRegs; }; char SystemZShortenInst::ID = 0; @@ -88,7 +88,7 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx, &SystemZ::GR64BitRegClass); Register OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx); - if (!LiveRegs.available(OtherReg)) + if (LiveRegs.contains(OtherReg)) return false; uint64_t Imm = MI.getOperand(1).getImm(); @@ -143,7 +143,7 @@ bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) { // Calls shortenOn001 if CCLive is false. CC def operand is added in // case of success. bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI, unsigned Opcode) { - if (LiveRegs.available(SystemZ::CC) && shortenOn001(MI, Opcode)) { + if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) { MachineInstrBuilder(*MI.getParent()->getParent(), &MI) .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); return true; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp index 90e81991284710..abcb1d0f16286e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp @@ -136,6 +136,7 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) { Function::Create(NewType, F.getLinkage(), F.getName() + ".fixed_sig"); NewF->setAttributes(F.getAttributes()); NewF->removeFnAttr("no-prototype"); + NewF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat; Replacements.emplace_back(&F, NewF); } diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 26932ba2c8e242..8e0f61a855661b 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -1778,12 +1778,9 @@ bool X86InstructionSelector::selectMulDivRem(MachineInstr &I, .addImm(8); // Now reference the 8-bit subreg of the result. - BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(TargetOpcode::SUBREG_TO_REG)) - .addDef(DstReg) - .addImm(0) - .addReg(ResultSuperReg) - .addImm(X86::sub_8bit); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), + DstReg) + .addReg(ResultSuperReg, 0, X86::sub_8bit); } else { BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), DstReg) diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 2e33adaed7a847..06389842ebb1ed 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -213,6 +213,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, return typeInSet(0, {s8, s16, s32})(Query) || (Is64Bit && typeInSet(0, {s64})(Query)); }) + .libcallFor({s64}) .clampScalar(0, s8, sMaxScalar); // integer shifts diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index 260879ffaa4f12..ca4d03913d093e 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -30,7 +30,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/EdgeBundles.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -1751,7 +1751,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { void FPS::setKillFlags(MachineBasicBlock &MBB) const { const TargetRegisterInfo &TRI = *MBB.getParent()->getSubtarget().getRegisterInfo(); - LiveRegUnits LPR(TRI); + LivePhysRegs LPR(TRI); LPR.addLiveOuts(MBB); @@ -1773,14 +1773,14 @@ void FPS::setKillFlags(MachineBasicBlock &MBB) const { if (MO.isDef()) { Defs.set(Reg); - if (LPR.available(MO.getReg())) + if (!LPR.contains(MO.getReg())) MO.setIsDead(); } else Uses.push_back(&MO); } for (auto *MO : Uses) - if (Defs.test(getFPReg(*MO)) || LPR.available(MO->getReg())) + if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg())) MO->setIsKill(); LPR.stepBackward(MI); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 94c4bbc4a09993..e1e6c22eb8cca5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53441,6 +53441,69 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget)) return R; + // In the middle end transforms: + // `(or (icmp eq X, C), (icmp eq X, C+1))` + // -> `(icmp ult (add x, -C), 2)` + // Likewise inverted cases with `ugt`. + // + // Since x86, pre avx512, doesn't have unsigned vector compares, this results + // in worse codegen. So, undo the middle-end transform and go back to `(or + // (icmp eq), (icmp eq))` form. + // Also skip AVX1 with ymm vectors, as the umin approach combines better than + // the xmm approach. + // + // NB: We don't handle the similiar simplication of `(and (icmp ne), (icmp + // ne))` as it doesn't end up instruction positive. + // TODO: We might want to do this for avx512 as well if we `sext` the result. + if (VT.isVector() && OpVT.isVector() && OpVT.isInteger() && + ISD::isUnsignedIntSetCC(CC) && LHS.getOpcode() == ISD::ADD && + !Subtarget.hasAVX512() && + (OpVT.getSizeInBits() <= 128 || !Subtarget.hasAVX() || + Subtarget.hasAVX2()) && + LHS.hasOneUse()) { + + APInt CmpC; + SDValue AddC = LHS.getOperand(1); + if (ISD::isConstantSplatVector(RHS.getNode(), CmpC) && + DAG.isConstantIntBuildVectorOrConstantInt(AddC)) { + // See which form we have depending on the constant/condition. + SDValue C0 = SDValue(); + SDValue C1 = SDValue(); + + // If we had `(add x, -1)` and can lower with `umin`, don't transform as + // we will end up generating an additional constant. Keeping in the + // current form has a slight latency cost, but it probably worth saving a + // constant. + if (ISD::isConstantSplatVectorAllOnes(AddC.getNode()) && + DAG.getTargetLoweringInfo().isOperationLegal(ISD::UMIN, OpVT)) { + // Pass + } + // Normal Cases + else if ((CC == ISD::SETULT && CmpC == 2) || + (CC == ISD::SETULE && CmpC == 1)) { + // These will constant fold. + C0 = DAG.getNegative(AddC, DL, OpVT); + C1 = DAG.getNode(ISD::SUB, DL, OpVT, C0, + DAG.getAllOnesConstant(DL, OpVT)); + } + // Inverted Cases + else if ((CC == ISD::SETUGT && (-CmpC) == 3) || + (CC == ISD::SETUGE && (-CmpC) == 2)) { + // These will constant fold. + C0 = DAG.getNOT(DL, AddC, OpVT); + C1 = DAG.getNode(ISD::ADD, DL, OpVT, C0, + DAG.getAllOnesConstant(DL, OpVT)); + } + if (C0 && C1) { + SDValue NewLHS = + DAG.getSetCC(DL, VT, LHS.getOperand(0), C0, ISD::SETEQ); + SDValue NewRHS = + DAG.getSetCC(DL, VT, LHS.getOperand(0), C1, ISD::SETEQ); + return DAG.getNode(ISD::OR, DL, VT, NewLHS, NewRHS); + } + } + } + // For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early // to avoid scalarization via legalization because v4i32 is not a legal type. if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 && diff --git a/llvm/lib/TextAPI/RecordsSlice.cpp b/llvm/lib/TextAPI/RecordsSlice.cpp index db52a2cdd85c9c..111a1fa6eaf43b 100644 --- a/llvm/lib/TextAPI/RecordsSlice.cpp +++ b/llvm/lib/TextAPI/RecordsSlice.cpp @@ -171,8 +171,8 @@ ObjCIVarRecord *RecordsSlice::findObjCIVar(bool IsScopedName, } GlobalRecord *RecordsSlice::addGlobal(StringRef Name, RecordLinkage Linkage, - GlobalRecord::Kind GV, - SymbolFlags Flags) { + GlobalRecord::Kind GV, SymbolFlags Flags, + bool Inlined) { if (GV == GlobalRecord::Kind::Function) Flags |= SymbolFlags::Text; else if (GV == GlobalRecord::Kind::Variable) @@ -182,7 +182,7 @@ GlobalRecord *RecordsSlice::addGlobal(StringRef Name, RecordLinkage Linkage, auto Result = Globals.insert({Name, nullptr}); if (Result.second) Result.first->second = - std::make_unique(Name, Linkage, Flags, GV); + std::make_unique(Name, Linkage, Flags, GV, Inlined); else { updateLinkage(Result.first->second.get(), Linkage); updateFlags(Result.first->second.get(), Flags); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 1b963a7de4a8ae..c691c8b1c55b30 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -997,6 +997,44 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth))) return I; + // Combine: + // (ptrmask (getelementptr i8, ptr p, imm i), imm mask) + // -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask) + // where only the low bits known to be zero in the pointer are changed + Value *InnerPtr; + uint64_t GEPIndex; + uint64_t PtrMaskImmediate; + if (match(I, m_Intrinsic( + m_PtrAdd(m_Value(InnerPtr), m_ConstantInt(GEPIndex)), + m_ConstantInt(PtrMaskImmediate)))) { + + LHSKnown = computeKnownBits(InnerPtr, Depth + 1, I); + if (!LHSKnown.isZero()) { + const unsigned trailingZeros = LHSKnown.countMinTrailingZeros(); + uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1; + + uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits; + uint64_t MaskedLowBitsGEPIndex = + GEPIndex & PointerAlignBits & PtrMaskImmediate; + + uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex; + + if (MaskedGEPIndex != GEPIndex) { + auto *GEP = cast(II->getArgOperand(0)); + Builder.SetInsertPoint(I); + Type *GEPIndexType = + DL.getIndexType(GEP->getPointerOperand()->getType()); + Value *MaskedGEP = Builder.CreateGEP( + GEP->getSourceElementType(), InnerPtr, + ConstantInt::get(GEPIndexType, MaskedGEPIndex), + GEP->getName(), GEP->isInBounds()); + + replaceOperand(*I, 0, MaskedGEP); + return I; + } + } + } + break; } diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index f22f53b8cd8fc6..db75eec21a3745 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -642,6 +642,24 @@ static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) { } namespace { +/// Helper RAII class to post-process inserted asan runtime calls during a +/// pass on a single Function. This is a no-op implementation, for a first NFC +/// commit. Coming up: detect and add "funclet" opBundle to function calls that +/// need them. +class RuntimeCallInserter { + Function *OwnerFn = nullptr; + +public: + RuntimeCallInserter(Function &Fn) : OwnerFn(&Fn) {} + + CallInst *createRuntimeCall(IRBuilder<> &IRB, FunctionCallee Callee, + ArrayRef Args = {}, + const Twine &Name = "") { + assert(IRB.GetInsertBlock()->getParent() == OwnerFn); + (void)OwnerFn; + return IRB.CreateCall(Callee, Args, Name, nullptr); + } +}; /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer { @@ -691,12 +709,14 @@ struct AddressSanitizer { void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, InterestingMemoryOperand &O, bool UseCalls, - const DataLayout &DL); - void instrumentPointerComparisonOrSubtraction(Instruction *I); + const DataLayout &DL, RuntimeCallInserter &RTCI); + void instrumentPointerComparisonOrSubtraction(Instruction *I, + RuntimeCallInserter &RTCI); void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, MaybeAlign Alignment, uint32_t TypeStoreSize, bool IsWrite, - Value *SizeArgument, bool UseCalls, uint32_t Exp); + Value *SizeArgument, bool UseCalls, uint32_t Exp, + RuntimeCallInserter &RTCI); Instruction *instrumentAMDGPUAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeStoreSize, bool IsWrite, @@ -707,20 +727,22 @@ struct AddressSanitizer { Instruction *InsertBefore, Value *Addr, TypeSize TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, - uint32_t Exp); + uint32_t Exp, + RuntimeCallInserter &RTCI); void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, Value *Mask, Value *EVL, Value *Stride, Instruction *I, Value *Addr, MaybeAlign Alignment, unsigned Granularity, Type *OpType, bool IsWrite, Value *SizeArgument, bool UseCalls, - uint32_t Exp); + uint32_t Exp, RuntimeCallInserter &RTCI); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeStoreSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, - Value *SizeArgument, uint32_t Exp); - void instrumentMemIntrinsic(MemIntrinsic *MI); + Value *SizeArgument, uint32_t Exp, + RuntimeCallInserter &RTCI); + void instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool suppressInstrumentationSiteForDebug(int &Instrumented); bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI); @@ -912,6 +934,7 @@ class ModuleAddressSanitizer { struct FunctionStackPoisoner : public InstVisitor { Function &F; AddressSanitizer &ASan; + RuntimeCallInserter &RTCI; DIBuilder DIB; LLVMContext *C; Type *IntptrTy; @@ -948,10 +971,12 @@ struct FunctionStackPoisoner : public InstVisitor { bool HasReturnsTwiceCall = false; bool PoisonStack; - FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) - : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false), - C(ASan.C), IntptrTy(ASan.IntptrTy), - IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), + FunctionStackPoisoner(Function &F, AddressSanitizer &ASan, + RuntimeCallInserter &RTCI) + : F(F), ASan(ASan), RTCI(RTCI), + DIB(*F.getParent(), /*AllowUnresolved*/ false), C(ASan.C), + IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)), + Mapping(ASan.Mapping), PoisonStack(ClStack && !Triple(F.getParent()->getTargetTriple()).isAMDGPU()) {} @@ -1034,8 +1059,8 @@ struct FunctionStackPoisoner : public InstVisitor { DynamicAreaOffset); } - IRB.CreateCall( - AsanAllocasUnpoisonFunc, + RTCI.createRuntimeCall( + IRB, AsanAllocasUnpoisonFunc, {IRB.CreateLoad(IntptrTy, DynamicAllocaLayout), DynamicAreaPtr}); } @@ -1251,16 +1276,18 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { } // Instrument memset/memmove/memcpy -void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { +void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI, + RuntimeCallInserter &RTCI) { InstrumentationIRBuilder IRB(MI); if (isa(MI)) { - IRB.CreateCall(isa(MI) ? AsanMemmove : AsanMemcpy, - {IRB.CreateAddrSpaceCast(MI->getOperand(0), PtrTy), - IRB.CreateAddrSpaceCast(MI->getOperand(1), PtrTy), - IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); + RTCI.createRuntimeCall( + IRB, isa(MI) ? AsanMemmove : AsanMemcpy, + {IRB.CreateAddrSpaceCast(MI->getOperand(0), PtrTy), + IRB.CreateAddrSpaceCast(MI->getOperand(1), PtrTy), + IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); } else if (isa(MI)) { - IRB.CreateCall( - AsanMemset, + RTCI.createRuntimeCall( + IRB, AsanMemset, {IRB.CreateAddrSpaceCast(MI->getOperand(0), PtrTy), IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); @@ -1498,7 +1525,7 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { } void AddressSanitizer::instrumentPointerComparisonOrSubtraction( - Instruction *I) { + Instruction *I, RuntimeCallInserter &RTCI) { IRBuilder<> IRB(I); FunctionCallee F = isa(I) ? AsanPtrCmpFunction : AsanPtrSubFunction; Value *Param[2] = {I->getOperand(0), I->getOperand(1)}; @@ -1506,7 +1533,7 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction( if (i->getType()->isPointerTy()) i = IRB.CreatePointerCast(i, IntptrTy); } - IRB.CreateCall(F, Param); + RTCI.createRuntimeCall(IRB, F, Param); } static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, @@ -1514,7 +1541,7 @@ static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, MaybeAlign Alignment, unsigned Granularity, TypeSize TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, - uint32_t Exp) { + uint32_t Exp, RuntimeCallInserter &RTCI) { // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check // if the data is properly aligned. if (!TypeStoreSize.isScalable()) { @@ -1529,18 +1556,19 @@ static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, *Alignment >= FixedSize / 8) return Pass->instrumentAddress(I, InsertBefore, Addr, Alignment, FixedSize, IsWrite, nullptr, UseCalls, - Exp); + Exp, RTCI); } } Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeStoreSize, - IsWrite, nullptr, UseCalls, Exp); + IsWrite, nullptr, UseCalls, Exp, RTCI); } void AddressSanitizer::instrumentMaskedLoadOrStore( AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, Value *Mask, Value *EVL, Value *Stride, Instruction *I, Value *Addr, MaybeAlign Alignment, unsigned Granularity, Type *OpType, bool IsWrite, - Value *SizeArgument, bool UseCalls, uint32_t Exp) { + Value *SizeArgument, bool UseCalls, uint32_t Exp, + RuntimeCallInserter &RTCI) { auto *VTy = cast(OpType); TypeSize ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); auto Zero = ConstantInt::get(IntptrTy, 0); @@ -1595,15 +1623,16 @@ void AddressSanitizer::instrumentMaskedLoadOrStore( } else { InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index}); } - doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), - InstrumentedAddress, Alignment, Granularity, - ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp); + doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), InstrumentedAddress, + Alignment, Granularity, ElemTypeSize, IsWrite, + SizeArgument, UseCalls, Exp, RTCI); }); } void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, InterestingMemoryOperand &O, bool UseCalls, - const DataLayout &DL) { + const DataLayout &DL, + RuntimeCallInserter &RTCI) { Value *Addr = O.getPtr(); // Optimization experiments. @@ -1649,11 +1678,11 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.MaybeEVL, O.MaybeStride, O.getInsn(), Addr, O.Alignment, Granularity, O.OpType, O.IsWrite, nullptr, - UseCalls, Exp); + UseCalls, Exp, RTCI); } else { doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment, - Granularity, O.TypeStoreSize, O.IsWrite, nullptr, UseCalls, - Exp); + Granularity, O.TypeStoreSize, O.IsWrite, nullptr, + UseCalls, Exp, RTCI); } } @@ -1661,24 +1690,25 @@ Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument, - uint32_t Exp) { + uint32_t Exp, + RuntimeCallInserter &RTCI) { InstrumentationIRBuilder IRB(InsertBefore); Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp); CallInst *Call = nullptr; if (SizeArgument) { if (Exp == 0) - Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][0], - {Addr, SizeArgument}); + Call = RTCI.createRuntimeCall(IRB, AsanErrorCallbackSized[IsWrite][0], + {Addr, SizeArgument}); else - Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][1], - {Addr, SizeArgument, ExpVal}); + Call = RTCI.createRuntimeCall(IRB, AsanErrorCallbackSized[IsWrite][1], + {Addr, SizeArgument, ExpVal}); } else { if (Exp == 0) - Call = - IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr); + Call = RTCI.createRuntimeCall( + IRB, AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr); else - Call = IRB.CreateCall(AsanErrorCallback[IsWrite][1][AccessSizeIndex], - {Addr, ExpVal}); + Call = RTCI.createRuntimeCall( + IRB, AsanErrorCallback[IsWrite][1][AccessSizeIndex], {Addr, ExpVal}); } Call->setCannotMerge(); @@ -1754,7 +1784,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, MaybeAlign Alignment, uint32_t TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, - uint32_t Exp) { + uint32_t Exp, + RuntimeCallInserter &RTCI) { if (TargetTriple.isAMDGPU()) { InsertBefore = instrumentAMDGPUAddress(OrigIns, InsertBefore, Addr, TypeStoreSize, IsWrite, SizeArgument); @@ -1779,11 +1810,12 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); if (UseCalls) { if (Exp == 0) - IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], - AddrLong); + RTCI.createRuntimeCall( + IRB, AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], AddrLong); else - IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex], - {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)}); + RTCI.createRuntimeCall( + IRB, AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex], + {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)}); return; } @@ -1830,8 +1862,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover); } - Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite, - AccessSizeIndex, SizeArgument, Exp); + Instruction *Crash = generateCrashCode( + CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument, Exp, RTCI); if (OrigIns->getDebugLoc()) Crash->setDebugLoc(OrigIns->getDebugLoc()); } @@ -1841,8 +1873,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able // to report the actual access size. void AddressSanitizer::instrumentUnusualSizeOrAlignment( - Instruction *I, Instruction *InsertBefore, Value *Addr, TypeSize TypeStoreSize, - bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { + Instruction *I, Instruction *InsertBefore, Value *Addr, + TypeSize TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, + uint32_t Exp, RuntimeCallInserter &RTCI) { InstrumentationIRBuilder IRB(InsertBefore); Value *NumBits = IRB.CreateTypeSize(IntptrTy, TypeStoreSize); Value *Size = IRB.CreateLShr(NumBits, ConstantInt::get(IntptrTy, 3)); @@ -1850,19 +1883,21 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment( Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); if (UseCalls) { if (Exp == 0) - IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0], - {AddrLong, Size}); + RTCI.createRuntimeCall(IRB, AsanMemoryAccessCallbackSized[IsWrite][0], + {AddrLong, Size}); else - IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1], - {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)}); + RTCI.createRuntimeCall( + IRB, AsanMemoryAccessCallbackSized[IsWrite][1], + {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)}); } else { Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1)); Value *LastByte = IRB.CreateIntToPtr( IRB.CreateAdd(AddrLong, SizeMinusOne), Addr->getType()); - instrumentAddress(I, InsertBefore, Addr, {}, 8, IsWrite, Size, false, Exp); + instrumentAddress(I, InsertBefore, Addr, {}, 8, IsWrite, Size, false, Exp, + RTCI); instrumentAddress(I, InsertBefore, LastByte, {}, 8, IsWrite, Size, false, - Exp); + Exp, RTCI); } } @@ -2881,6 +2916,8 @@ bool AddressSanitizer::instrumentFunction(Function &F, FunctionStateRAII CleanupObj(this); + RuntimeCallInserter RTCI(F); + FunctionModified |= maybeInsertDynamicShadowAtFunctionEntry(F); // We can't instrument allocas used with llvm.localescape. Only static allocas @@ -2963,27 +3000,27 @@ bool AddressSanitizer::instrumentFunction(Function &F, for (auto &Operand : OperandsToInstrument) { if (!suppressInstrumentationSiteForDebug(NumInstrumented)) instrumentMop(ObjSizeVis, Operand, UseCalls, - F.getParent()->getDataLayout()); + F.getParent()->getDataLayout(), RTCI); FunctionModified = true; } for (auto *Inst : IntrinToInstrument) { if (!suppressInstrumentationSiteForDebug(NumInstrumented)) - instrumentMemIntrinsic(Inst); + instrumentMemIntrinsic(Inst, RTCI); FunctionModified = true; } - FunctionStackPoisoner FSP(F, *this); + FunctionStackPoisoner FSP(F, *this, RTCI); bool ChangedStack = FSP.runOnFunction(); // We must unpoison the stack before NoReturn calls (throw, _exit, etc). // See e.g. https://github.com/google/sanitizers/issues/37 for (auto *CI : NoReturnCalls) { IRBuilder<> IRB(CI); - IRB.CreateCall(AsanHandleNoReturnFunc, {}); + RTCI.createRuntimeCall(IRB, AsanHandleNoReturnFunc, {}); } for (auto *Inst : PointerComparisonsOrSubtracts) { - instrumentPointerComparisonOrSubtraction(Inst); + instrumentPointerComparisonOrSubtraction(Inst, RTCI); FunctionModified = true; } @@ -3128,9 +3165,10 @@ void FunctionStackPoisoner::copyToShadow(ArrayRef ShadowMask, if (j - i >= ASan.MaxInlinePoisoningSize) { copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase); - IRB.CreateCall(AsanSetShadowFunc[Val], - {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)), - ConstantInt::get(IntptrTy, j - i)}); + RTCI.createRuntimeCall( + IRB, AsanSetShadowFunc[Val], + {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)), + ConstantInt::get(IntptrTy, j - i)}); Done = j; } } @@ -3417,8 +3455,8 @@ void FunctionStackPoisoner::processStaticAllocas() { StackMallocIdx = StackMallocSizeClass(LocalStackSize); assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass); Value *FakeStackValue = - IRBIf.CreateCall(AsanStackMallocFunc[StackMallocIdx], - ConstantInt::get(IntptrTy, LocalStackSize)); + RTCI.createRuntimeCall(IRBIf, AsanStackMallocFunc[StackMallocIdx], + ConstantInt::get(IntptrTy, LocalStackSize)); IRB.SetInsertPoint(InsBefore); FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term, ConstantInt::get(IntptrTy, 0)); @@ -3428,7 +3466,8 @@ void FunctionStackPoisoner::processStaticAllocas() { // void *LocalStackBase = (FakeStack) ? FakeStack : // alloca(LocalStackSize); StackMallocIdx = StackMallocSizeClass(LocalStackSize); - FakeStack = IRB.CreateCall(AsanStackMallocFunc[StackMallocIdx], + FakeStack = + RTCI.createRuntimeCall(IRB, AsanStackMallocFunc[StackMallocIdx], ConstantInt::get(IntptrTy, LocalStackSize)); } Value *NoFakeStack = @@ -3563,8 +3602,8 @@ void FunctionStackPoisoner::processStaticAllocas() { IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getPtrTy())); } else { // For larger frames call __asan_stack_free_*. - IRBPoison.CreateCall( - AsanStackFreeFunc[StackMallocIdx], + RTCI.createRuntimeCall( + IRBPoison, AsanStackFreeFunc[StackMallocIdx], {FakeStack, ConstantInt::get(IntptrTy, LocalStackSize)}); } @@ -3585,8 +3624,8 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, // For now just insert the call to ASan runtime. Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy); Value *SizeArg = ConstantInt::get(IntptrTy, Size); - IRB.CreateCall( - DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc, + RTCI.createRuntimeCall( + IRB, DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc, {AddrArg, SizeArg}); } @@ -3647,7 +3686,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { ConstantInt::get(IntptrTy, Alignment.value())); // Insert __asan_alloca_poison call for new created alloca. - IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize}); + RTCI.createRuntimeCall(IRB, AsanAllocaPoisonFunc, {NewAddress, OldSize}); // Store the last alloca's address to DynamicAllocaLayout. We'll need this // for unpoisoning stuff. diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index ee9aa73ff03403..b23a6ed1f08415 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_component_library(LLVMInstrumentation PGOInstrumentation.cpp PGOMemOPSizeOpt.cpp PoisonChecking.cpp + RemoveTrapsPass.cpp SanitizerCoverage.cpp SanitizerBinaryMetadata.cpp ValueProfileCollector.cpp diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 236ee8910d46ab..6bae679e11be23 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -187,15 +187,15 @@ static cl::opt cl::desc("Use selective instrumentation"), cl::Hidden, cl::init(false)); -static cl::opt HotPercentileCutoff( +static cl::opt ClHotPercentileCutoff( "hwasan-percentile-cutoff-hot", cl::init(0), cl::desc("Alternative hot percentile cuttoff." "By default `-profile-summary-cutoff-hot` is used.")); static cl::opt - RandomSkipRate("hwasan-random-skip-rate", cl::init(0), - cl::desc("Probability value in the range [0.0, 1.0] " - "to skip instrumentation of a function.")); + ClRandomSkipRate("hwasan-random-skip-rate", cl::init(0), + cl::desc("Probability value in the range [0.0, 1.0] " + "to skip instrumentation of a function.")); STATISTIC(NumTotalFuncs, "Number of total funcs"); STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs"); @@ -301,7 +301,7 @@ class HWAddressSanitizer { ? ClEnableKhwasan : CompileKernel; this->Rng = - RandomSkipRate.getNumOccurrences() ? M.createRNG("hwasan") : nullptr; + ClRandomSkipRate.getNumOccurrences() ? M.createRNG("hwasan") : nullptr; initializeModule(); } @@ -412,8 +412,8 @@ class HWAddressSanitizer { Type *VoidTy = Type::getVoidTy(M.getContext()); Type *IntptrTy; PointerType *PtrTy; - Type *Int8Ty; - Type *Int32Ty; + Type *Int8Ty = Type::getInt8Ty(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); Type *Int64Ty = Type::getInt64Ty(M.getContext()); bool CompileKernel; @@ -615,8 +615,6 @@ void HWAddressSanitizer::initializeModule() { IRBuilder<> IRB(*C); IntptrTy = IRB.getIntPtrTy(DL); PtrTy = IRB.getPtrTy(); - Int8Ty = IRB.getInt8Ty(); - Int32Ty = IRB.getInt32Ty(); HwasanCtorFunction = nullptr; @@ -1539,8 +1537,8 @@ void HWAddressSanitizer::sanitizeFunction(Function &F, NumTotalFuncs++; if (CSelectiveInstrumentation) { - if (RandomSkipRate.getNumOccurrences()) { - std::bernoulli_distribution D(RandomSkipRate); + if (ClRandomSkipRate.getNumOccurrences()) { + std::bernoulli_distribution D(ClRandomSkipRate); if (D(*Rng)) return; } else { @@ -1549,10 +1547,10 @@ void HWAddressSanitizer::sanitizeFunction(Function &F, MAMProxy.getCachedResult(*F.getParent()); if (PSI && PSI->hasProfileSummary()) { auto &BFI = FAM.getResult(F); - if ((HotPercentileCutoff.getNumOccurrences() && - HotPercentileCutoff >= 0) + if ((ClHotPercentileCutoff.getNumOccurrences() && + ClHotPercentileCutoff >= 0) ? PSI->isFunctionHotInCallGraphNthPercentile( - HotPercentileCutoff, &F, BFI) + ClHotPercentileCutoff, &F, BFI) : PSI->isFunctionHotInCallGraph(&F, BFI)) return; } else { diff --git a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp new file mode 100644 index 00000000000000..d87f7482a21d25 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp @@ -0,0 +1,104 @@ +//===- RemoveTrapsPass.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/RemoveTrapsPass.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "remove-traps" + +static cl::opt HotPercentileCutoff( + "remove-traps-percentile-cutoff-hot", cl::init(0), + cl::desc("Alternative hot percentile cuttoff. By default " + "`-profile-summary-cutoff-hot` is used.")); + +static cl::opt + RandomRate("remove-traps-random-rate", cl::init(0.0), + cl::desc("Probability value in the range [0.0, 1.0] of " + "unconditional pseudo-random checks removal.")); + +STATISTIC(NumChecksTotal, "Number of checks"); +STATISTIC(NumChecksRemoved, "Number of removed checks"); + +static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, + const ProfileSummaryInfo *PSI) { + SmallVector Remove; + std::unique_ptr Rng; + + auto ShouldRemove = [&](bool IsHot) { + if (!RandomRate.getNumOccurrences()) + return IsHot; + if (!Rng) + Rng = F.getParent()->createRNG(F.getName()); + std::bernoulli_distribution D(RandomRate); + return D(*Rng); + }; + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + IntrinsicInst *II = dyn_cast(&I); + if (!II) + continue; + auto ID = II->getIntrinsicID(); + switch (ID) { + case Intrinsic::ubsantrap: { + ++NumChecksTotal; + + bool IsHot = false; + if (PSI) { + uint64_t Count = 0; + for (const auto *PR : predecessors(&BB)) + Count += BFI.getBlockProfileCount(PR).value_or(0); + + IsHot = + HotPercentileCutoff.getNumOccurrences() + ? (HotPercentileCutoff > 0 && + PSI->isHotCountNthPercentile(HotPercentileCutoff, Count)) + : PSI->isHotCount(Count); + } + + if (ShouldRemove(IsHot)) { + Remove.push_back(II); + ++NumChecksRemoved; + } + break; + } + default: + break; + } + } + } + + for (IntrinsicInst *I : Remove) + I->eraseFromParent(); + + return !Remove.empty(); +} + +PreservedAnalyses RemoveTrapsPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (F.isDeclaration()) + return PreservedAnalyses::all(); + auto &MAMProxy = AM.getResult(F); + ProfileSummaryInfo *PSI = + MAMProxy.getCachedResult(*F.getParent()); + BlockFrequencyInfo &BFI = AM.getResult(F); + + return removeUbsanTraps(F, BFI, PSI) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 7a2011888ab008..de3bfb57b538d3 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -805,9 +805,12 @@ static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR, Value *FrozenX = X; if (!isGuaranteedNotToBeUndef(X)) FrozenX = B.CreateFreeze(X, X->getName() + ".frozen"); - auto *AdjX = B.CreateNUWSub(FrozenX, Y, Instr->getName() + ".urem"); - auto *Cmp = - B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, Y, Instr->getName() + ".cmp"); + Value *FrozenY = Y; + if (!isGuaranteedNotToBeUndef(Y)) + FrozenY = B.CreateFreeze(Y, Y->getName() + ".frozen"); + auto *AdjX = B.CreateNUWSub(FrozenX, FrozenY, Instr->getName() + ".urem"); + auto *Cmp = B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, FrozenY, + Instr->getName() + ".cmp"); ExpandedOp = B.CreateSelect(Cmp, FrozenX, AdjX); } else { auto *Cmp = diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp index 2ffe89a2458405..bfe474d8204578 100644 --- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp +++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp @@ -12,6 +12,7 @@ #include "llvm/Transforms/Utils/MemoryTaggingSupport.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/StackSafetyAnalysis.h" @@ -69,14 +70,12 @@ bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT, ++NumCoveredExits; } } - // If there's a mix of covered and non-covered exits, just put the untag - // on exits, so we avoid the redundancy of untagging twice. if (NumCoveredExits == ReachableRetVec.size()) { - for (auto *End : Ends) - Callback(End); + for_each(Ends, Callback); } else { - for (auto *RI : ReachableRetVec) - Callback(RI); + // If there's a mix of covered and non-covered exits, just put the untag + // on exits, so we avoid the redundancy of untagging twice. + for_each(ReachableRetVec, Callback); // We may have inserted untag outside of the lifetime interval. // Signal the caller to remove the lifetime end call for this alloca. return false; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll index 58f9dd3633e2c4..be5cca0765edf1 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -25,27 +25,27 @@ define amdgpu_kernel void @shufflevector_i16() { ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; VI-LABEL: 'shufflevector_i16' @@ -65,27 +65,27 @@ define amdgpu_kernel void @shufflevector_i16() { ; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-10-SIZE-LABEL: 'shufflevector_i16' @@ -105,27 +105,27 @@ define amdgpu_kernel void @shufflevector_i16() { ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; VI-SIZE-LABEL: 'shufflevector_i16' @@ -145,27 +145,27 @@ define amdgpu_kernel void @shufflevector_i16() { ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer @@ -227,27 +227,27 @@ define amdgpu_kernel void @shufflevector_i8() { ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf31 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf32 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'shufflevector_i8' @@ -267,27 +267,27 @@ define amdgpu_kernel void @shufflevector_i8() { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf31 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf32 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %shuf00 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer @@ -348,27 +348,27 @@ define amdgpu_kernel void @shufflevector_i32() { ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf31 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'shufflevector_i32' @@ -388,27 +388,27 @@ define amdgpu_kernel void @shufflevector_i32() { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf31 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %shuf00 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer @@ -455,11 +455,11 @@ define amdgpu_kernel void @shufflevector_i32() { define void @shuffle() { ; GFX9-10-LABEL: 'shuffle' ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> @@ -475,11 +475,11 @@ define void @shuffle() { ; ; VI-LABEL: 'shuffle' ; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> @@ -495,11 +495,11 @@ define void @shuffle() { ; ; GFX9-10-SIZE-LABEL: 'shuffle' ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> @@ -515,11 +515,11 @@ define void @shuffle() { ; ; VI-SIZE-LABEL: 'shuffle' ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll index 3ac2b7e26650ab..b84f22907cc71f 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll @@ -19,7 +19,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; VLEN128-LABEL: 'test_vXf64' @@ -32,7 +32,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; VLEN128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; VLEN128-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; VLEN128-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; VLEN128-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; VLEN128-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; VLEN128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll index afcf600e13ef64..dd67772042cbd3 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll @@ -56,8 +56,8 @@ define <8 x i64> @interleave2_v8i64(<4 x i64> %v0, <4 x i64> %v1) { ; TODO: getInstructionCost doesn't call getShuffleCost here because the shuffle changes length define {<4 x i8>, <4 x i8>} @deinterleave_2(<8 x i8> %v) { ; CHECK-LABEL: 'deinterleave_2' -; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v0 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v1 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v0 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v1 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %res0 = insertvalue { <4 x i8>, <4 x i8> } poison, <4 x i8> %v0, 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %res1 = insertvalue { <4 x i8>, <4 x i8> } %res0, <4 x i8> %v1, 1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret { <4 x i8>, <4 x i8> } %res1 diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll index 0c1c085f5afc19..61d99c20fa9668 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,14 +19,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -48,14 +48,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -77,17 +77,24 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> @@ -106,17 +113,24 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> @@ -133,26 +147,40 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; SSE-LABEL: 'test_vXi16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> @@ -162,26 +190,40 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; SSE-LABEL: 'test_vXi8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll index 7244b7b4f00f5d..0f8503cd1c3ccf 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,21 +19,21 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -48,21 +48,21 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -77,21 +77,28 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -106,21 +113,28 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -133,26 +147,40 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; SSE-LABEL: 'test_vXi16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> @@ -162,26 +190,40 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; SSE-LABEL: 'test_vXi8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll index b1fa00b5a71351..8c4f55eb8adcb2 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,14 +19,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -48,14 +48,14 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -77,17 +77,24 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> @@ -106,17 +113,24 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> @@ -133,26 +147,40 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; SSE-LABEL: 'test_vXi16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> @@ -162,26 +190,40 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; SSE-LABEL: 'test_vXi8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll index 60cb8cffd1a595..ffc470d5f3448c 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for concat_subvector style shuffles. @@ -19,21 +19,21 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a512, <2 x double> %b128, <4 x double> %b256, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x double> %a128, <2 x double> %b128, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x double> %a256, <4 x double> %b256, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <8 x double> %a512, <8 x double> %b512, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -48,21 +48,21 @@ define void @test_vXf64(<2 x double> %a128, <4 x double> %a256, <8 x double> %a5 define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -77,21 +77,28 @@ define void @test_vXi64(<2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, <2 x define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512, <4 x float> %b128, <8 x float> %b256, <16 x float> %b512) { ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXf32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x float> %a128, <4 x float> %b128, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x float> %a256, <8 x float> %b256, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <16 x float> %a512, <16 x float> %b512, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -106,21 +113,28 @@ define void @test_vXf32(<4 x float> %a128, <8 x float> %a256, <16 x float> %a512 define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXi32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -133,26 +147,40 @@ define void @test_vXi32(<4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, <4 x } define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { -; SSE-LABEL: 'test_vXi16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V256_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <16 x i32> %V512_128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <32 x i32> @@ -162,26 +190,40 @@ define void @test_vXi16(<8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, <8 } define void @test_vXi8(<16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { -; SSE-LABEL: 'test_vXi8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1024_512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <128 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V256_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <32 x i32> %V512_128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <64 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll index 9a86fbe105ec29..91314d39690520 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll @@ -27,21 +27,34 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll index 393dec82428b34..33431083c68bb3 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll @@ -27,21 +27,34 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> @@ -53,7 +66,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll index 63bb07bf4fd894..039758e0b6e384 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll @@ -27,21 +27,34 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll index b521a759484670..6a82a4a7432ef8 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll @@ -27,21 +27,34 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> @@ -53,7 +66,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll new file mode 100644 index 00000000000000..1a5a6ac08d4045 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Test case for https://github.com/llvm/llvm-project/issues/82665. +define void @indirect_ptr_recurrences_read_write(ptr %A, ptr %B) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4, !tbaa !10 + %xor = xor i32 %l, 1 + store i32 %xor, ptr %ptr.recur, align 4, !tbaa !10 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define i32 @indirect_ptr_recurrences_read_only_loop(ptr %A, ptr %B) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_only_loop' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %xor, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4, !tbaa !10 + %xor = xor i32 %l, 1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %xor +} + +define void @indirect_ptr_recurrences_read_write_may_alias_no_tbaa(ptr %A, ptr %B) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write_may_alias_no_tbaa' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4 + %xor = xor i32 %l, 1 + store i32 %xor, ptr %ptr.recur, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @indirect_ptr_recurrences_read_write_may_alias_different_obj(ptr %A, ptr %B, ptr %C) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write_may_alias_different_obj' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4 + %xor = xor i32 %l, 1 + %gep.C = getelementptr inbounds ptr, ptr %C, i64 %iv + store i32 %xor, ptr %gep.C, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @indirect_ptr_recurrences_read_write_may_noalias_different_obj(ptr %A, ptr %B, ptr noalias %C) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write_may_noalias_different_obj' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4 + %xor = xor i32 %l, 1 + %gep.C = getelementptr inbounds ptr, ptr %C, i64 %iv + store i32 %xor, ptr %gep.C, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + + +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !11, i64 0} +!11 = !{!"int", !8, i64 0} diff --git a/llvm/test/Assembler/dbg-record-invalid-0.ll b/llvm/test/Assembler/dbg-record-invalid-0.ll new file mode 100644 index 00000000000000..feb513a405f9ec --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-0.ll @@ -0,0 +1,38 @@ +;; Test that we get a parser error when a debug record appears post-terminator. +;; Note: From the parser's perspective, the error is that the debug record is +;; appearing at the start of a new unnamed basic block which contains no actual +;; instructions. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: + ret i32 %a, !dbg !18 + #dbg_value(!DIArgList(i32 %a), !12, !DIExpression(), !14) +; CHECK: :[[@LINE+1]]:1: error: expected instruction opcode +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-1.ll b/llvm/test/Assembler/dbg-record-invalid-1.ll new file mode 100644 index 00000000000000..7ab5751777e8cf --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-1.ll @@ -0,0 +1,39 @@ +;; Test that we get a parser error when a debug intrinsic appears in the same +;; module as a debug record. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: + #dbg_value(!DIArgList(i32 %a), !12, !DIExpression(), !14) +; CHECK: :[[@LINE+1]]:8: error: llvm.dbg intrinsic should not appear in a module using non-intrinsic debug info + call void @llvm.dbg.value(metadata i32 %a, metadata !12, metadata !DIExpression()), !dbg !14 + ret i32 %a, !dbg !18 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-2.ll b/llvm/test/Assembler/dbg-record-invalid-2.ll new file mode 100644 index 00000000000000..a019f73feab9c5 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-2.ll @@ -0,0 +1,36 @@ +;; Test that we get a parser error when we have a debug record with an +;; incorrect number of arguments. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: +; CHECK: :[[@LINE+1]]:24: error: expected '!' here + #dbg_value(i32 %a, i32 0, !DIExpression(), !14) + ret i32 %a, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-3.ll b/llvm/test/Assembler/dbg-record-invalid-3.ll new file mode 100644 index 00000000000000..e6f072373f54d5 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-3.ll @@ -0,0 +1,39 @@ +;; Test that we get a parser error when a debug record appears in the same +;; module as a debug intrinsic. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: + call void @llvm.dbg.value(metadata i32 %a, metadata !12, metadata !DIExpression()), !dbg !14 +; CHECK: :[[@LINE+1]]:5: error: debug record should not appear in a module containing debug info intrinsics + #dbg_value(!DIArgList(i32 %a), !12, !DIExpression(), !14) + ret i32 %a, !dbg !18 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-4.ll b/llvm/test/Assembler/dbg-record-invalid-4.ll new file mode 100644 index 00000000000000..f898477603c8e2 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-4.ll @@ -0,0 +1,36 @@ +;; Test that we get a parser error when we have a debug record with an invalid +;; type. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: +; CHECK: :[[@LINE+1]]:6: error: expected debug record type here + #dbg_invalid(!DIArgList(i32 %a), !12, !DIExpression(), !14) + ret i32 %a, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-5.ll b/llvm/test/Assembler/dbg-record-invalid-5.ll new file mode 100644 index 00000000000000..5ea588b87668c4 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-5.ll @@ -0,0 +1,35 @@ +;; Test that we get a parser error when a basic block contains only a debug +;; record. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: + #dbg_value(!DIArgList(i32 %a), !12, !DIExpression(), !14) +; CHECK: :[[@LINE+1]]:1: error: expected instruction opcode +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-6.ll b/llvm/test/Assembler/dbg-record-invalid-6.ll new file mode 100644 index 00000000000000..72dafcdb97fce4 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-6.ll @@ -0,0 +1,36 @@ +;; Test that we get a parser error when we have a debug record with an +;; incorrect number of arguments. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: +; CHECK: :[[@LINE+1]]:46: error: expected '!' here + #dbg_value(i32 %a, !12, !DIExpression(), i32 0) + ret i32 %a, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-7.ll b/llvm/test/Assembler/dbg-record-invalid-7.ll new file mode 100644 index 00000000000000..036a85a2977fc7 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-7.ll @@ -0,0 +1,36 @@ +;; Test that we get a parser error when we have a debug record with an incorrect +;; number of arguments. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: +; CHECK: :[[@LINE+1]]:44: error: Expected ',' here + #dbg_value(i32 %a, !12, !DIExpression()) + ret i32 %a, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/Assembler/dbg-record-invalid-8.ll b/llvm/test/Assembler/dbg-record-invalid-8.ll new file mode 100644 index 00000000000000..d0b8f36d7895c5 --- /dev/null +++ b/llvm/test/Assembler/dbg-record-invalid-8.ll @@ -0,0 +1,36 @@ +;; Test that we get a parser error when we have a debug assign record with an +;; incorrect number of arguments. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ModuleID = '' +source_filename = "" + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: +; CHECK: :[[@LINE+1]]:50: error: Expected ',' here + #dbg_assign(i32 %a, !12, !DIExpression(), !14) + ret i32 %a, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !10) +!14 = !DILocation(line: 3, column: 15, scope: !7) +!15 = distinct !DIAssignID() +!16 = !DILocation(line: 3, column: 20, scope: !7) +!17 = !DILocation(line: 3, column: 25, scope: !7) +!18 = !DILocation(line: 3, column: 30, scope: !7) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir index d050823e3b9494..1eb445c03efcd6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir @@ -406,3 +406,66 @@ body: | %zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>) $q0 = COPY %zext ... +--- +name: test_dont_combine_pointers +body: | + ; CHECK-LABEL: name: test_dont_combine_pointers + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -8 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C1]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 + ; CHECK-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[C2]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x60000000), %bb.3(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[C]](p0) :: (load (p0)) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](p0), [[INTTOPTR]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](p0), [[INTTOPTR1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: G_BRCOND [[AND]](s1), %bb.3 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.1(0x55555555), %bb.3(0x2aaaaaab) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: G_BRCOND [[C3]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + %1:_(p0) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 -8 + %2:_(p0) = G_INTTOPTR %3(s64) + %6:_(s64) = G_CONSTANT i64 -16 + %5:_(p0) = G_INTTOPTR %6(s64) + %10:_(s1) = G_CONSTANT i1 false + + bb.2: + successors: %bb.4(0x60000000), %bb.3(0x20000000) + + %0:_(p0) = G_LOAD %1(p0) :: (load (p0)) + %4:_(s1) = G_ICMP intpred(eq), %0(p0), %2 + %7:_(s1) = G_ICMP intpred(eq), %0(p0), %5 + %8:_(s1) = G_OR %4, %7 + %9:_(s1) = G_SELECT %8(s1), %10, %10 + G_BRCOND %8(s1), %bb.4 + G_BR %bb.3 + + bb.4: + successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab) + + G_BRCOND %10(s1), %bb.2 + G_BR %bb.3 + + bb.3: + G_BR %bb.2 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir index 7b73c8cec47746..2bf7e84a379ba0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir @@ -859,3 +859,40 @@ body: | RET_ReallyLR implicit $x0 ... +--- +name: dont_combine_pointer_type_select_of_constant +alignment: 4 +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: dont_combine_pointer_type_select_of_constant + ; CHECK: liveins: $w0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C1]](s64) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[TRUNC1]](s1), [[C]], [[INTTOPTR]] + ; CHECK-NEXT: G_STORE [[SELECT]](p0), [[COPY1]](p0) :: (store (p0)) + ; CHECK-NEXT: RET_ReallyLR + %3:_(s32) = COPY $w0 + %2:_(s8) = G_TRUNC %3(s32) + %1:_(p0) = COPY $x1 + %4:_(s8) = G_ASSERT_ZEXT %2, 1 + %0:_(s1) = G_TRUNC %4(s8) + %6:_(p0) = G_CONSTANT i64 0 + %8:_(s64) = G_CONSTANT i64 -1 + %7:_(p0) = G_INTTOPTR %8(s64) + %5:_(p0) = G_SELECT %0(s1), %6, %7 + G_STORE %5(p0), %1(p0) :: (store (p0)) + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir index 6f6cf2cc165b9f..e12353c7ef5bec 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir @@ -216,3 +216,73 @@ body: | $q0 = COPY %2(<2 x s64>) RET_ReallyLR ... +--- +name: v3s8_crash +body: | + ; CHECK-LABEL: name: v3s8_crash + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w1, $w2, $w3, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[C2]](s16), [[C1]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<16 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 16, 16, 16, 1, 16, 16, 16, 2, 16, 16, 16, undef, undef, undef, undef) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[SHUF]](<16 x s8>) + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<4 x s32>) = G_UITOFP [[BITCAST]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UITOFP]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[UV4]](s32), [[COPY]](p0) :: (store (s32), align 16) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 8, align 8) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + liveins: $w1, $w2, $w3, $x0 + + %0:_(p0) = COPY $x0 + %2:_(s32) = COPY $w1 + %3:_(s32) = COPY $w2 + %4:_(s32) = COPY $w3 + %5:_(<3 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %4(s32) + %1:_(<3 x s8>) = G_TRUNC %5(<3 x s32>) + %8:_(s64) = G_CONSTANT i64 0 + %11:_(s8) = G_IMPLICIT_DEF + %7:_(s8) = G_CONSTANT i8 0 + %10:_(<3 x s8>) = G_BUILD_VECTOR %7(s8), %11(s8), %11(s8) + + bb.2: + %14:_(s64) = G_CONSTANT i64 0 + %15:_(s8) = G_CONSTANT i8 0 + %6:_(<3 x s8>) = G_INSERT_VECTOR_ELT %1, %15(s8), %14(s64) + %9:_(<12 x s8>) = G_SHUFFLE_VECTOR %6(<3 x s8>), %10, shufflemask(0, 3, 3, 3, 1, 3, 3, 3, 2, 3, 3, 3) + %12:_(<3 x s32>) = G_BITCAST %9(<12 x s8>) + %13:_(<3 x s32>) = G_UITOFP %12(<3 x s32>) + G_STORE %13(<3 x s32>), %0(p0) :: (store (<3 x s32>)) + G_BR %bb.2 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index d87704cf45d5d5..ecad3f11513487 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -625,6 +625,9 @@ # DEBUG-NEXT: G_SHUFFLE_VECTOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_SPLAT_VECTOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_CTTZ (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index 4f87bb2a3ee811..d1e38b85fa9c36 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -64,8 +64,6 @@ ; CHECK-NEXT: AArch64 pseudo instruction expansion pass ; CHECK-NEXT: Insert KCFI indirect call checks ; CHECK-NEXT: AArch64 speculation hardening pass -; CHECK-NEXT: AArch64 Indirect Thunks -; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: Analyze Machine Code For Garbage Collection ; CHECK-NEXT: Insert fentry calls ; CHECK-NEXT: Insert XRay ops @@ -75,6 +73,8 @@ ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata +; CHECK-NEXT: AArch64 Indirect Thunks +; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: AArch64 Pointer Authentication ; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Branch relaxation pass diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index ae0dbed09979b4..eee9a27c90c19e 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -205,8 +205,6 @@ ; CHECK-NEXT: AArch64 load / store optimization pass ; CHECK-NEXT: Insert KCFI indirect call checks ; CHECK-NEXT: AArch64 speculation hardening pass -; CHECK-NEXT: AArch64 Indirect Thunks -; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Falkor HW Prefetch Fix Late Phase @@ -227,6 +225,8 @@ ; CHECK-NEXT: Machine Sanitizer Binary Metadata ; CHECK-NEXT: Machine Outliner ; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: AArch64 Indirect Thunks +; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: AArch64 Pointer Authentication ; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Branch relaxation pass diff --git a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll index 580886520789e3..3ffaf962425b38 100644 --- a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll @@ -32,8 +32,16 @@ ; HOTNESS: Freeing Pass 'Machine Outliner' ; HOTNESS-NEXT: Executing Pass 'Function Pass Manager' -; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' -; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' +; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Executing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... +; HOTNESS-NEXT: Freeing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... +; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Executing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... +; HOTNESS-NEXT: Freeing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... +; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... ; HOTNESS-NEXT: Executing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; HOTNESS-NEXT: Freeing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... @@ -73,8 +81,16 @@ ; NO_HOTNESS: Freeing Pass 'Machine Outliner' ; NO_HOTNESS-NEXT: Executing Pass 'Function Pass Manager' -; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' -; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' +; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Freeing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Executing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Freeing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... ; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; NO_HOTNESS-NEXT: Freeing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll index a5fcbf764b64fb..5806bcf0dacf16 100644 --- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -1028,22 +1028,14 @@ false: ret i32 %tmp.0 } -; Re-aligned stack pointer with all caller-save regs live. +; Re-aligned stack pointer with all caller-save regs live. See bug +; 26642. In this case we currently avoid shrink wrapping because +; ensuring we have a scratch register to re-align the stack pointer is +; too complicated. Output should be the same for both enabled and +; disabled shrink wrapping. define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr5, ptr %ptr6) { ; ENABLE-LABEL: stack_realign2: ; ENABLE: ; %bb.0: -; ENABLE-NEXT: lsl w8, w1, w0 -; ENABLE-NEXT: lsr w9, w0, w1 -; ENABLE-NEXT: lsl w14, w0, w1 -; ENABLE-NEXT: lsr w11, w1, w0 -; ENABLE-NEXT: add w15, w1, w0 -; ENABLE-NEXT: sub w10, w8, w9 -; ENABLE-NEXT: subs w17, w1, w0 -; ENABLE-NEXT: add w16, w14, w8 -; ENABLE-NEXT: add w12, w9, w11 -; ENABLE-NEXT: add w13, w11, w15 -; ENABLE-NEXT: b.le LBB14_2 -; ENABLE-NEXT: ; %bb.1: ; %true ; ENABLE-NEXT: stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill ; ENABLE-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill ; ENABLE-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill @@ -1051,8 +1043,8 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr ; ENABLE-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill ; ENABLE-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill ; ENABLE-NEXT: add x29, sp, #80 -; ENABLE-NEXT: sub x18, sp, #32 -; ENABLE-NEXT: and sp, x18, #0xffffffffffffffe0 +; ENABLE-NEXT: sub x9, sp, #32 +; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 ; ENABLE-NEXT: .cfi_def_cfa w29, 16 ; ENABLE-NEXT: .cfi_offset w30, -8 ; ENABLE-NEXT: .cfi_offset w29, -16 @@ -1066,17 +1058,22 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr ; ENABLE-NEXT: .cfi_offset w26, -80 ; ENABLE-NEXT: .cfi_offset w27, -88 ; ENABLE-NEXT: .cfi_offset w28, -96 +; ENABLE-NEXT: lsl w8, w1, w0 +; ENABLE-NEXT: lsr w9, w0, w1 +; ENABLE-NEXT: lsl w14, w0, w1 +; ENABLE-NEXT: lsr w11, w1, w0 +; ENABLE-NEXT: add w15, w1, w0 +; ENABLE-NEXT: sub w10, w8, w9 +; ENABLE-NEXT: subs w17, w1, w0 +; ENABLE-NEXT: add w16, w14, w8 +; ENABLE-NEXT: add w12, w9, w11 +; ENABLE-NEXT: add w13, w11, w15 +; ENABLE-NEXT: b.le LBB14_2 +; ENABLE-NEXT: ; %bb.1: ; %true ; ENABLE-NEXT: str w0, [sp] ; ENABLE-NEXT: ; InlineAsm Start ; ENABLE-NEXT: nop ; ENABLE-NEXT: ; InlineAsm End -; ENABLE-NEXT: sub sp, x29, #80 -; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload ; ENABLE-NEXT: LBB14_2: ; %false ; ENABLE-NEXT: str w14, [x2] ; ENABLE-NEXT: str w8, [x3] @@ -1087,6 +1084,13 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr ; ENABLE-NEXT: stp w0, w1, [x2, #4] ; ENABLE-NEXT: stp w16, w10, [x2, #12] ; ENABLE-NEXT: stp w12, w13, [x2, #20] +; ENABLE-NEXT: sub sp, x29, #80 +; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload ; ENABLE-NEXT: ret ; ; DISABLE-LABEL: stack_realign2: diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll index da2c2985acf971..9464e3447993b3 100644 --- a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll +++ b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll @@ -213,6 +213,10 @@ attributes #0 = { "sign-return-address"="all" } ; CHECK-DUMP-NOT: DW_CFA_remember_state ; CHECK-DUMP-NOT: DW_CFA_restore_state +; CHECK-DUMP: CFA=WSP{{$}} +; CHECK-DUMP: reg34=1 +; CHECK-DUMP-NOT: reg34=0 + ; baz_async ; CHECK-DUMP-LABEL: FDE ; CHECK-DUMP: Format: DWARF32 @@ -222,9 +226,24 @@ attributes #0 = { "sign-return-address"="all" } ; CHECK-DUMP: DW_CFA_restore_state: ; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state: +; CHECK-DUMP: CFA=WSP{{$}} +;; First DW_CFA_AARCH64_negate_ra_state: +; CHECK-DUMP: reg34=1 +;; Second DW_CFA_AARCH64_negate_ra_state: +; CHECK-DUMP: reg34=0 +;; DW_CFA_restore_state: +; CHECK-DUMP: reg34=1 +;; Third DW_CFA_AARCH64_negate_ra_state: +; CHECK-DUMP: reg34=0 +; CHECK-DUMP-NOT: reg34= + ; baz_sync ; CHECK-DUMP-LABEL: FDE ; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state: ; CHECK-DUMP-NOT: DW_CFA_AARCH64_negate_ra_state ; CHECK-DUMP-NOT: DW_CFA_remember_state ; CHECK-DUMP-NOT: DW_CFA_restore_state + +; CHECK-DUMP: CFA=WSP{{$}} +; CHECK-DUMP: reg34=1 +; CHECK-DUMP-NOT: reg34=0 diff --git a/llvm/test/CodeGen/AArch64/sls-crash.ll b/llvm/test/CodeGen/AArch64/sls-crash.ll new file mode 100644 index 00000000000000..5dfc3c7824a8b6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sls-crash.ll @@ -0,0 +1,6 @@ +; RUN: llc -mtriple aarch64 -O0 < %s + +define hidden void @foo() "target-features"="+harden-sls-blr" { +entry: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll b/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll index 5f3b1503b46b32..b281204a66e46a 100644 --- a/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll +++ b/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll @@ -18,7 +18,8 @@ define hidden void @_ZTv0_n24_N2C6D1Ev(ptr %this) minsize sspreq "target-feature ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: bl OUTLINED_FUNCTION_1 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN2C6D1Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb @@ -45,7 +46,8 @@ define hidden void @_ZTv0_n24_N2C6D0Ev(ptr %this) minsize sspreq "target-feature ; CHECK-NEXT: b.ne .LBB1_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: bl OUTLINED_FUNCTION_1 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN2C6D0Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb @@ -71,7 +73,8 @@ define hidden void @_ZTv0_n24_N3C10D1Ev(ptr %this) minsize sspreq "target-featur ; CHECK-NEXT: b.ne .LBB2_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: bl OUTLINED_FUNCTION_1 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN3C10D1Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb @@ -97,7 +100,8 @@ define hidden void @_ZTv0_n24_N3C10D0Ev(ptr %this) minsize sspreq "target-featur ; CHECK-NEXT: b.ne .LBB3_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: bl OUTLINED_FUNCTION_1 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN3C10D0Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb diff --git a/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir b/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir index 078d8a5bf6b66e..f2d79bd7206908 100644 --- a/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir +++ b/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc %s --start-before=shrink-wrap -stop-after=prologepilog -o - | FileCheck %s +# RUN: llc %s --start-before=shrink-wrap -stop-after=prologepilog -verify-machineinstrs -o - | FileCheck %s --- | target triple = "aarch64-linux" @@ -43,43 +43,43 @@ machineFunctionInfo: {} body: | ; CHECK-LABEL: name: f ; CHECK: bb.0.entry: - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: liveins: $w0, $lr - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x9 = IMPLICIT_DEF - ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv - ; CHECK-NEXT: Bcc 12, %bb.2, implicit $nzcv - ; CHECK-NEXT: B %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1.if.then1: ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x23, $x25, $x25, $x27, $x28, $lr + ; CHECK-NEXT: liveins: $w0, $lr ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1) - ; CHECK-NEXT: $xzr = frame-setup SUBXri $sp, 36, 12 + ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 36, 12 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3.if.then1: + ; CHECK-NEXT: bb.3.entry: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x25, $x27, $x28 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1, 12 ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 - ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $xzr, 24, implicit-def $nzcv + ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv ; CHECK-NEXT: frame-setup Bcc 1, %bb.3, implicit $nzcv ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4.if.then1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x25, $x27, $x28 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2544, 0 ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 + ; CHECK-NEXT: $x9 = IMPLICIT_DEF + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 12, %bb.2, implicit $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x23, $x25, $x25, $x27, $x28 + ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x0 = ADDXri $sp, 0, 0 ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.exit: ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 36, 12 ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2544, 0 ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.exit: - ; CHECK-NEXT: liveins: $lr - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: RET_ReallyLR bb.0.entry: successors: %bb.1(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir index 8ec2778992e23c..bdfc7c2b25c28b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK --- name: test_neg_one_f16_sgpr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll index b29ae366ca1ae5..e500aae7e0f3c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll index 6251dfdc392ebc..3037c1ec2829e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll index fe6d16bd8b5ead..086144873a042f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll index c80d7a6d9a836e..a6e1f5ef12b4bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<8 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) { ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll index c4edc5b72b2fbb..3aa81da317d67d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll index e2831afe68e74b..6c232b680ebf56 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll index c4d70fd5f0637f..717a4fc823d518 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll index 7e1d09805df3f6..1ef50cbd0fc7ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll index b6f1828dce2576..0bd255e5e1af49 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<4 x half> %A, <8 x half> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) { ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll index 0d1871a18d4055..7399fa0a341e2a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16: diff --git a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll index 0772f9d0199f22..2c69ae58f0e611 100644 --- a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll +++ b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p3(ptr addrspace(3), i32) declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1), i32) diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll index 482f61624ec7df..15a696bb3af09f 100644 --- a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll +++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll @@ -1,12 +1,12 @@ -; RUN: not llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s -; RUN: not llc -march=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s -; RUN: not llc -march=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s -; RUN: not llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s +; RUN: not llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s +; RUN: not llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s +; RUN: not llc -mtriple=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s +; RUN: not llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s -; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s -; RUN: llc -march=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -o - %s -; RUN: llc -march=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=6 -o - %s -; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s ; GFX9-V5: gfx9-generic is only available on code object version 6 or better ; GFX101-V5: gfx10-1-generic is only available on code object version 6 or better diff --git a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll index 6c324ddc654667..c69207c0472e7c 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,DAG %s -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN,GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,DAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN,GISEL %s define amdgpu_ps void @test_s_load_i8(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) { ; GCN-LABEL: test_s_load_i8: diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir index 993933b2b5c723..28d79efc00b0db 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-insert-waitcnts %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-insert-waitcnts %s -o - | FileCheck %s --- name: test diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll index 11bafa197a2f09..9b63a8a3efcf92 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 define float @raw_buffer_atomic_cond_sub_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { ; GFX12-LABEL: raw_buffer_atomic_cond_sub_return: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll index 02e27152bf5c59..8ea10f4496a2ef 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s define float @test_amdgcn_dot4_f32_fp8_bf8(i32 %a, i32 %b, float %c) { ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll index 8f1e6f3ac1a0c3..b4415c12926ac3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-SDAG-W32 %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-GISEL-W32 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-SDAG-W32 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-GISEL-W32 %s declare <2 x i32> @llvm.amdgcn.global.load.tr.v2i32.p1(ptr addrspace(1)) declare <8 x i16> @llvm.amdgcn.global.load.tr.v8i16.p1(ptr addrspace(1)) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll index d5a45fb838fc7f..7ad1416789de79 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-SDAG-W64 %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-GISEL-W64 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-SDAG-W64 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-GISEL-W64 %s declare i32 @llvm.amdgcn.global.load.tr.i32.p1(ptr addrspace(1)) declare <4 x i16> @llvm.amdgcn.global.load.tr.v4i16.p1(ptr addrspace(1)) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll index f03dbb9eb16457..ff8f28dae3f8ff 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 define amdgpu_ps void @test_bvhcnt() { ; GFX12-LABEL: test_bvhcnt: diff --git a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir index f50688240fe8bd..ed57caadea5c56 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s --- | define amdgpu_kernel void @spill_regpressure_less() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll index e6fbe97f8dc0a5..96fa2a45a2ddf0 100644 --- a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll +++ b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s define amdgpu_ps void @intrinsic_store_system_scope(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; GFX12-LABEL: intrinsic_store_system_scope: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll index 5f662ac088a351..cb3d76cd9c0ba1 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll index c80e5e0e3506cc..c4adc8c3212801 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll index 5426458e6b1df9..dbb4db05a35c5d 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll index b0213abba90485..009288dbdf530a 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<8 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) { ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll index a66747567dd3d5..1012287838f120 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll index 1e82e74d92c4ed..ab1121a705529d 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll index 19b0e697183f4d..462fc01e8e79e2 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll index fa5eb3605e67a3..161d222d10ff70 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll index 861eb1aaa333ab..511a116a78be59 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<4 x half> %A, <8 x half> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) { ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll index a05a8f4117ecee..5fde11cb4b1b14 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) { ; GFX12-LABEL: test_wmma_f32_16x16x16_f16: diff --git a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir index 47a1e06c5d7dc5..ef85de20129434 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir +++ b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX12 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX12 %s # D0 overlaps A1, B1, C1 or Index1. Overlap starts at vgpr0. # $D0 = wmma0 $A0, $B0, $C0 or $D0 = swmmac0 $A0, $B0, $C0, $Index0 diff --git a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir index 34c37aa91ab80f..277db33e940dd0 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir +++ b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX12 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX12 %s # D0 overlaps A1, B1, C1 or Index1. Overlap starts at vgpr0. # $D0 = wmma0 $A0, $B0, $C0 or $D0 = swmmac0 $A0, $B0, $C0, $Index0 diff --git a/llvm/test/CodeGen/PowerPC/pr74951.ll b/llvm/test/CodeGen/PowerPC/pr74951.ll new file mode 100644 index 00000000000000..c1b2e3ee0dd68b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr74951.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -start-before=codegenprepare -verify-machineinstrs -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s + +%struct.anon = type { i32 } + +@b = local_unnamed_addr global %struct.anon { i32 -1 }, align 4 +@g = local_unnamed_addr global [1 x i1] zeroinitializer, align 1 + +define noundef signext i32 @main() { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld r3, L..C0(r2) # @b +; CHECK-NEXT: lwz r3, 0(r3) +; CHECK-NEXT: andi. r4, r3, 65535 +; CHECK-NEXT: bne cr0, L..BB0_4 +; CHECK-NEXT: # %bb.1: # %lor.rhs.i.i +; CHECK-NEXT: extsw r4, r3 +; CHECK-NEXT: neg r5, r4 +; CHECK-NEXT: rldicl r5, r5, 1, 63 +; CHECK-NEXT: xori r5, r5, 1 +; CHECK-NEXT: cmpw r4, r5 +; CHECK-NEXT: crnot 4*cr5+lt, eq +; CHECK-NEXT: li r4, 1 +; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3 +; CHECK-NEXT: # %bb.2: # %lor.rhs.i.i +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: L..BB0_3: # %lor.rhs.i.i +; CHECK-NEXT: ld r5, L..C1(r2) # @g +; CHECK-NEXT: stb r4, 0(r5) +; CHECK-NEXT: L..BB0_4: # %g.exit +; CHECK-NEXT: ld r4, L..C1(r2) # @g +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: rldicl r5, r3, 1, 63 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: stb r5, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i32, ptr @b, align 4 + %conv4.i = sext i32 %0 to i64 + %cmp.i = icmp slt i32 %0, 1 + %conv.i = zext i1 %cmp.i to i32 + %cmp1.i = icmp ne i32 %0, %conv.i + %conv3.i = trunc i32 %0 to i16 + %tobool.not.i.i = icmp eq i16 %conv3.i, 0 + br i1 %tobool.not.i.i, label %lor.rhs.i.i, label %g.exit + +lor.rhs.i.i: ; preds = %entry + store i1 %cmp1.i, ptr @g, align 1 + br label %g.exit + +g.exit: ; preds = %lor.end.i.i + %4 = trunc i64 %conv4.i to i32 + %cmp.i9.i = icmp sgt i32 %4, 0 + store i1 %cmp.i9.i, ptr @g, align 1 + ret i32 0 +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll new file mode 100644 index 00000000000000..df7778899b0d09 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll @@ -0,0 +1,1774 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+v -global-isel -stop-after=irtranslator \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v -global-isel -stop-after=irtranslator \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64 %s + +define @shufflevector_nxv1i1_0() { + ; RV32-LABEL: name: shufflevector_nxv1i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv1i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i1_1() { + ; RV32-LABEL: name: shufflevector_nxv1i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv1i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv1i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i1_0() { + ; RV32-LABEL: name: shufflevector_nxv2i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv2i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i1_1() { + ; RV32-LABEL: name: shufflevector_nxv2i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv2i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv2i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i1_0() { + ; RV32-LABEL: name: shufflevector_nxv4i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv4i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i1_1() { + ; RV32-LABEL: name: shufflevector_nxv4i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv4i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv4i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i1_0() { + ; RV32-LABEL: name: shufflevector_nxv8i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv8i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i1_1() { + ; RV32-LABEL: name: shufflevector_nxv8i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv8i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv8i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i1_0() { + ; RV32-LABEL: name: shufflevector_nxv16i1_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv16i1_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i1_1() { + ; RV32-LABEL: name: shufflevector_nxv16i1_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv16i1_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i1_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i1_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v0 + ; + ; RV64-LABEL: name: shufflevector_nxv16i1_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) + ; RV64-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v0 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i8_0() { + ; RV32-LABEL: name: shufflevector_nxv1i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i8_1() { + ; RV32-LABEL: name: shufflevector_nxv1i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i8_0() { + ; RV32-LABEL: name: shufflevector_nxv2i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i8_1() { + ; RV32-LABEL: name: shufflevector_nxv2i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i8_0() { + ; RV32-LABEL: name: shufflevector_nxv4i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i8_1() { + ; RV32-LABEL: name: shufflevector_nxv4i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i8_0() { + ; RV32-LABEL: name: shufflevector_nxv8i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i8_1() { + ; RV32-LABEL: name: shufflevector_nxv8i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i8_0() { + ; RV32-LABEL: name: shufflevector_nxv16i8_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv16i8_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i8_1() { + ; RV32-LABEL: name: shufflevector_nxv16i8_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv16i8_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i8_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i8_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv16i8_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i16_0() { + ; RV32-LABEL: name: shufflevector_nxv1i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i16_1() { + ; RV32-LABEL: name: shufflevector_nxv1i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i16_0() { + ; RV32-LABEL: name: shufflevector_nxv2i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i16_1() { + ; RV32-LABEL: name: shufflevector_nxv2i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i16_0() { + ; RV32-LABEL: name: shufflevector_nxv4i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i16_1() { + ; RV32-LABEL: name: shufflevector_nxv4i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv4i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i16_0() { + ; RV32-LABEL: name: shufflevector_nxv8i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv8i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i16_1() { + ; RV32-LABEL: name: shufflevector_nxv8i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv8i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv8i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i16_0() { + ; RV32-LABEL: name: shufflevector_nxv16i16_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv16i16_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i16_1() { + ; RV32-LABEL: name: shufflevector_nxv16i16_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv16i16_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i16_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i16_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m4 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv16i16_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m4 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i32_0() { + ; RV32-LABEL: name: shufflevector_nxv1i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i32_1() { + ; RV32-LABEL: name: shufflevector_nxv1i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i32_0() { + ; RV32-LABEL: name: shufflevector_nxv2i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i32_1() { + ; RV32-LABEL: name: shufflevector_nxv2i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv2i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i32_0() { + ; RV32-LABEL: name: shufflevector_nxv4i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv4i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i32_1() { + ; RV32-LABEL: name: shufflevector_nxv4i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv4i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv4i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i32_0() { + ; RV32-LABEL: name: shufflevector_nxv8i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv8i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i32_1() { + ; RV32-LABEL: name: shufflevector_nxv8i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv8i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m4 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv8i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m4 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i32_0() { + ; RV32-LABEL: name: shufflevector_nxv16i32_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i32_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i32_1() { + ; RV32-LABEL: name: shufflevector_nxv16i32_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i32_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i32_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i32_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i32_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv1i64_0() { + ; RV32-LABEL: name: shufflevector_nxv1i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv1i64_1() { + ; RV32-LABEL: name: shufflevector_nxv1i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv1i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv1i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: shufflevector_nxv1i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv2i64_0() { + ; RV32-LABEL: name: shufflevector_nxv2i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv2i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv2i64_1() { + ; RV32-LABEL: name: shufflevector_nxv2i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv2i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv2i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv2i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m2 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: shufflevector_nxv2i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m2 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv4i64_0() { + ; RV32-LABEL: name: shufflevector_nxv4i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv4i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv4i64_1() { + ; RV32-LABEL: name: shufflevector_nxv4i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv4i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv4i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv4i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m4 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: shufflevector_nxv4i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m4 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv8i64_0() { + ; RV32-LABEL: name: shufflevector_nxv8i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv8i64_1() { + ; RV32-LABEL: name: shufflevector_nxv8i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv8i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv8i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: shufflevector_nxv8i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + +define @shufflevector_nxv16i64_0() { + ; RV32-LABEL: name: shufflevector_nxv16i64_0 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV32-NEXT: $v8m8 = COPY [[UV]]() + ; RV32-NEXT: $v16m8 = COPY [[UV1]]() + ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i64_0 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV64-NEXT: $v8m8 = COPY [[UV]]() + ; RV64-NEXT: $v16m8 = COPY [[UV1]]() + ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + %a = shufflevector poison, poison, poison + ret %a +} + +define @shufflevector_nxv16i64_1() { + ; RV32-LABEL: name: shufflevector_nxv16i64_1 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV32-NEXT: $v8m8 = COPY [[UV]]() + ; RV32-NEXT: $v16m8 = COPY [[UV1]]() + ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i64_1 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV64-NEXT: $v8m8 = COPY [[UV]]() + ; RV64-NEXT: $v16m8 = COPY [[UV1]]() + ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + %a = shufflevector undef, undef, undef + ret %a +} + +define @shufflevector_nxv16i64_2( %a) { + ; RV32-LABEL: name: shufflevector_nxv16i64_2 + ; RV32: bb.1 (%ir-block.0): + ; RV32-NEXT: liveins: $v8m8, $v16m8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV32-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 + ; RV32-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_() = G_CONCAT_VECTORS [[COPY]](), [[COPY1]]() + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](), [[C]](s64) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV32-NEXT: $v8m8 = COPY [[UV]]() + ; RV32-NEXT: $v16m8 = COPY [[UV1]]() + ; RV32-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + ; + ; RV64-LABEL: name: shufflevector_nxv16i64_2 + ; RV64: bb.1 (%ir-block.0): + ; RV64-NEXT: liveins: $v8m8, $v16m8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; RV64-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 + ; RV64-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_() = G_CONCAT_VECTORS [[COPY]](), [[COPY1]]() + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](), [[C]](s64) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) + ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() + ; RV64-NEXT: $v8m8 = COPY [[UV]]() + ; RV64-NEXT: $v16m8 = COPY [[UV1]]() + ; RV64-NEXT: PseudoRET implicit $v8m8, implicit $v16m8 + %b = shufflevector %a , poison, zeroinitializer + ret %b +} + + + diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index f032756e007b68..e16f6abcca244c 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -58,7 +58,8 @@ define i64 @sltiu(i64 %a) nounwind { ; RV32I-LABEL: sltiu: ; RV32I: # %bb.0: ; RV32I-NEXT: sltiu a0, a0, 3 -; RV32I-NEXT: seqz a1, a1 +; RV32I-NEXT: snez a1, a1 +; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index aa962d68fc5285..5914e45a153302 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; RV32IA-NEXT: # =>This Loop Header: Depth=1 ; RV32IA-NEXT: # Child Loop BB2_3 Depth 2 ; RV32IA-NEXT: mv a3, a2 -; RV32IA-NEXT: addi a2, a2, 1 -; RV32IA-NEXT: sltu a4, a3, a1 -; RV32IA-NEXT: neg a4, a4 -; RV32IA-NEXT: and a4, a4, a2 +; RV32IA-NEXT: addi a4, a2, 1 +; RV32IA-NEXT: sltu a2, a2, a1 +; RV32IA-NEXT: neg a2, a2 +; RV32IA-NEXT: and a4, a2, a4 ; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start ; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1 ; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 @@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB3_3 Depth 2 ; RV64IA-NEXT: mv a3, a2 -; RV64IA-NEXT: addi a2, a2, 1 -; RV64IA-NEXT: sltu a4, a3, a1 -; RV64IA-NEXT: neg a4, a4 -; RV64IA-NEXT: and a4, a4, a2 +; RV64IA-NEXT: addi a4, a2, 1 +; RV64IA-NEXT: sltu a2, a2, a1 +; RV64IA-NEXT: neg a2, a2 +; RV64IA-NEXT: and a4, a2, a4 ; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll index d533607ad54e38..0216d00be21854 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -456,121 +456,142 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind { define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat: ; RV32IZFBFMIN: # %bb.0: # %start -; RV32IZFBFMIN-NEXT: addi sp, sp, -16 -; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: addi sp, sp, -32 +; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 +; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0 +; RV32IZFBFMIN-NEXT: neg s1, s0 ; RV32IZFBFMIN-NEXT: lui a0, 913408 ; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0 -; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0 +; RV32IZFBFMIN-NEXT: neg s3, s2 ; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFBFMIN-NEXT: call __fixsfdi +; RV32IZFBFMIN-NEXT: and a0, s3, a0 +; RV32IZFBFMIN-NEXT: or a0, s1, a0 +; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFBFMIN-NEXT: neg a2, a2 ; RV32IZFBFMIN-NEXT: lui a4, 524288 -; RV32IZFBFMIN-NEXT: lui a2, 524288 -; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2 +; RV32IZFBFMIN-NEXT: li a5, 1 +; RV32IZFBFMIN-NEXT: lui a3, 524288 +; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2 ; RV32IZFBFMIN-NEXT: # %bb.1: # %start -; RV32IZFBFMIN-NEXT: mv a2, a1 +; RV32IZFBFMIN-NEXT: mv a3, a1 ; RV32IZFBFMIN-NEXT: .LBB10_2: # %start -; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4 +; RV32IZFBFMIN-NEXT: and a0, a2, a0 +; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4 ; RV32IZFBFMIN-NEXT: # %bb.3: -; RV32IZFBFMIN-NEXT: addi a2, a4, -1 +; RV32IZFBFMIN-NEXT: addi a3, a4, -1 ; RV32IZFBFMIN-NEXT: .LBB10_4: # %start -; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFBFMIN-NEXT: neg a4, a1 -; RV32IZFBFMIN-NEXT: and a1, a4, a2 -; RV32IZFBFMIN-NEXT: neg a2, a3 -; RV32IZFBFMIN-NEXT: neg a3, s0 -; RV32IZFBFMIN-NEXT: and a0, a3, a0 -; RV32IZFBFMIN-NEXT: or a0, a2, a0 -; RV32IZFBFMIN-NEXT: and a0, a4, a0 -; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: addi sp, sp, 16 +; RV32IZFBFMIN-NEXT: and a1, a2, a3 +; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: addi sp, sp, 32 ; RV32IZFBFMIN-NEXT: ret ; ; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat: ; R32IDZFBFMIN: # %bb.0: # %start -; R32IDZFBFMIN-NEXT: addi sp, sp, -16 -; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: addi sp, sp, -32 +; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0) +; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 +; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0 +; R32IDZFBFMIN-NEXT: neg s1, s0 ; R32IDZFBFMIN-NEXT: lui a0, 913408 ; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0 -; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0 +; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0 +; R32IDZFBFMIN-NEXT: neg s3, s2 ; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0 ; R32IDZFBFMIN-NEXT: call __fixsfdi +; R32IDZFBFMIN-NEXT: and a0, s3, a0 +; R32IDZFBFMIN-NEXT: or a0, s1, a0 +; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0 +; R32IDZFBFMIN-NEXT: neg a2, a2 ; R32IDZFBFMIN-NEXT: lui a4, 524288 -; R32IDZFBFMIN-NEXT: lui a2, 524288 -; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2 +; R32IDZFBFMIN-NEXT: li a5, 1 +; R32IDZFBFMIN-NEXT: lui a3, 524288 +; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2 ; R32IDZFBFMIN-NEXT: # %bb.1: # %start -; R32IDZFBFMIN-NEXT: mv a2, a1 +; R32IDZFBFMIN-NEXT: mv a3, a1 ; R32IDZFBFMIN-NEXT: .LBB10_2: # %start -; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) -; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0 -; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4 +; R32IDZFBFMIN-NEXT: and a0, a2, a0 +; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4 ; R32IDZFBFMIN-NEXT: # %bb.3: -; R32IDZFBFMIN-NEXT: addi a2, a4, -1 +; R32IDZFBFMIN-NEXT: addi a3, a4, -1 ; R32IDZFBFMIN-NEXT: .LBB10_4: # %start -; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0 -; R32IDZFBFMIN-NEXT: neg a4, a1 -; R32IDZFBFMIN-NEXT: and a1, a4, a2 -; R32IDZFBFMIN-NEXT: neg a2, a3 -; R32IDZFBFMIN-NEXT: neg a3, s0 -; R32IDZFBFMIN-NEXT: and a0, a3, a0 -; R32IDZFBFMIN-NEXT: or a0, a2, a0 -; R32IDZFBFMIN-NEXT: and a0, a4, a0 -; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: and a1, a2, a3 +; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; R32IDZFBFMIN-NEXT: addi sp, sp, 16 +; R32IDZFBFMIN-NEXT: addi sp, sp, 32 ; R32IDZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_l_bf16_sat: ; RV32ID: # %bb.0: # %start -; RV32ID-NEXT: addi sp, sp, -16 -; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-NEXT: addi sp, sp, -32 +; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32ID-NEXT: lui a0, %hi(.LCPI10_0) +; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fs0, a0 +; RV32ID-NEXT: flt.s s0, fa5, fs0 +; RV32ID-NEXT: neg s1, s0 ; RV32ID-NEXT: lui a0, 913408 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fle.s s0, fa5, fs0 +; RV32ID-NEXT: fle.s s2, fa5, fs0 +; RV32ID-NEXT: neg s3, s2 ; RV32ID-NEXT: fmv.s fa0, fs0 ; RV32ID-NEXT: call __fixsfdi +; RV32ID-NEXT: and a0, s3, a0 +; RV32ID-NEXT: or a0, s1, a0 +; RV32ID-NEXT: feq.s a2, fs0, fs0 +; RV32ID-NEXT: neg a2, a2 ; RV32ID-NEXT: lui a4, 524288 -; RV32ID-NEXT: lui a2, 524288 -; RV32ID-NEXT: beqz s0, .LBB10_2 +; RV32ID-NEXT: li a5, 1 +; RV32ID-NEXT: lui a3, 524288 +; RV32ID-NEXT: bne s2, a5, .LBB10_2 ; RV32ID-NEXT: # %bb.1: # %start -; RV32ID-NEXT: mv a2, a1 +; RV32ID-NEXT: mv a3, a1 ; RV32ID-NEXT: .LBB10_2: # %start -; RV32ID-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32ID-NEXT: flt.s a3, fa5, fs0 -; RV32ID-NEXT: beqz a3, .LBB10_4 +; RV32ID-NEXT: and a0, a2, a0 +; RV32ID-NEXT: beqz s0, .LBB10_4 ; RV32ID-NEXT: # %bb.3: -; RV32ID-NEXT: addi a2, a4, -1 +; RV32ID-NEXT: addi a3, a4, -1 ; RV32ID-NEXT: .LBB10_4: # %start -; RV32ID-NEXT: feq.s a1, fs0, fs0 -; RV32ID-NEXT: neg a4, a1 -; RV32ID-NEXT: and a1, a4, a2 -; RV32ID-NEXT: neg a2, a3 -; RV32ID-NEXT: neg a3, s0 -; RV32ID-NEXT: and a0, a3, a0 -; RV32ID-NEXT: or a0, a2, a0 -; RV32ID-NEXT: and a0, a4, a0 -; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ID-NEXT: and a1, a2, a3 +; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32ID-NEXT: addi sp, sp, 16 +; RV32ID-NEXT: addi sp, sp, 32 ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat: @@ -654,7 +675,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-NEXT: neg s0, a0 ; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero ; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0 -; CHECK32ZFBFMIN-NEXT: neg s1, a0 +; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1 +; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1 ; CHECK32ZFBFMIN-NEXT: call __fixunssfdi ; CHECK32ZFBFMIN-NEXT: and a0, s1, a0 ; CHECK32ZFBFMIN-NEXT: or a0, s0, a0 @@ -681,7 +703,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: neg s0, a0 ; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: fle.s a0, fa5, fa0 -; RV32ID-NEXT: neg s1, a0 +; RV32ID-NEXT: xori a0, a0, 1 +; RV32ID-NEXT: addi s1, a0, -1 ; RV32ID-NEXT: call __fixunssfdi ; RV32ID-NEXT: and a0, s1, a0 ; RV32ID-NEXT: or a0, s0, a0 diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index eb8ffe75ef7697..f2e37f55521bac 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -749,40 +749,47 @@ define i64 @fcvt_l_d(double %a) nounwind { define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IFD-LABEL: fcvt_l_d_sat: ; RV32IFD: # %bb.0: # %start -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: lui a0, %hi(.LCPI12_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI12_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI12_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB12_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB12_2 ; RV32IFD-NEXT: # %bb.1: # %start -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB12_2: # %start -; RV32IFD-NEXT: lui a1, %hi(.LCPI12_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB12_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB12_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB12_4: # %start -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_l_d_sat: @@ -800,40 +807,45 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) -; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 +; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: lui a5, 524288 -; RV32IZFINXZDINX-NEXT: lui a3, 524288 -; RV32IZFINXZDINX-NEXT: beqz a2, .LBB12_2 +; RV32IZFINXZDINX-NEXT: li a6, 1 +; RV32IZFINXZDINX-NEXT: lui a4, 524288 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB12_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: # %start -; RV32IZFINXZDINX-NEXT: mv a3, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB12_2: # %start -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI12_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI12_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI12_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a4, a6, s0 -; RV32IZFINXZDINX-NEXT: beqz a4, .LBB12_4 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB12_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a3, a5, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB12_4: # %start -; RV32IZFINXZDINX-NEXT: feq.d a1, s0, s0 -; RV32IZFINXZDINX-NEXT: neg a5, a1 -; RV32IZFINXZDINX-NEXT: and a1, a5, a3 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a4 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a5, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1013,23 +1025,23 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: lui a0, %hi(.LCPI14_0) +; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32IFD-NEXT: flt.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi -; RV32IFD-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a2) -; RV32IFD-NEXT: and a0, s0, a0 -; RV32IFD-NEXT: flt.d a2, fa5, fs0 -; RV32IFD-NEXT: neg a2, a2 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a2, a1 +; RV32IFD-NEXT: and a0, s1, a0 +; RV32IFD-NEXT: or a0, s0, a0 +; RV32IFD-NEXT: and a1, s1, a1 +; RV32IFD-NEXT: or a1, s0, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -1054,11 +1066,12 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) ; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero -; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI14_0) -; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a4) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a4) ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI14_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 ; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 ; RV32IZFINXZDINX-NEXT: neg a3, a3 diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll index b8c6e84502408f..ff2d8e00630071 100644 --- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll @@ -50,41 +50,48 @@ define signext i32 @test_floor_si32(double %x) { define i64 @test_floor_si64(double %x) nounwind { ; RV32IFD-LABEL: test_floor_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call floor ; RV32IFD-NEXT: lui a0, %hi(.LCPI1_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI1_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB1_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB1_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB1_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB1_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB1_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB1_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_floor_si64: @@ -101,44 +108,47 @@ define i64 @test_floor_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call floor -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB1_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB1_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB1_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI1_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI1_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB1_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB1_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -208,7 +218,8 @@ define i64 @test_floor_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -235,29 +246,28 @@ define i64 @test_floor_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call floor ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI3_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI3_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI3_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI3_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -316,41 +326,48 @@ define signext i32 @test_ceil_si32(double %x) { define i64 @test_ceil_si64(double %x) nounwind { ; RV32IFD-LABEL: test_ceil_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call ceil ; RV32IFD-NEXT: lui a0, %hi(.LCPI5_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI5_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI5_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB5_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB5_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB5_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB5_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB5_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB5_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_ceil_si64: @@ -367,44 +384,47 @@ define i64 @test_ceil_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call ceil -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI5_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI5_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI5_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI5_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI5_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB5_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB5_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB5_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI5_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI5_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB5_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB5_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -474,7 +494,8 @@ define i64 @test_ceil_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -501,29 +522,28 @@ define i64 @test_ceil_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call ceil ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI7_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI7_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI7_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI7_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -582,41 +602,48 @@ define signext i32 @test_trunc_si32(double %x) { define i64 @test_trunc_si64(double %x) nounwind { ; RV32IFD-LABEL: test_trunc_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call trunc ; RV32IFD-NEXT: lui a0, %hi(.LCPI9_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI9_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB9_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB9_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB9_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB9_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB9_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB9_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_trunc_si64: @@ -633,44 +660,47 @@ define i64 @test_trunc_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call trunc -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI9_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI9_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI9_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI9_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI9_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB9_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB9_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB9_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI9_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI9_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB9_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB9_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -740,7 +770,8 @@ define i64 @test_trunc_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -767,29 +798,28 @@ define i64 @test_trunc_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call trunc ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI11_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI11_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI11_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI11_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -848,41 +878,48 @@ define signext i32 @test_round_si32(double %x) { define i64 @test_round_si64(double %x) nounwind { ; RV32IFD-LABEL: test_round_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call round ; RV32IFD-NEXT: lui a0, %hi(.LCPI13_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI13_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI13_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB13_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB13_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB13_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB13_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB13_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB13_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_round_si64: @@ -899,44 +936,47 @@ define i64 @test_round_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call round -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI13_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI13_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI13_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI13_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI13_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB13_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB13_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB13_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI13_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI13_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB13_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB13_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1006,7 +1046,8 @@ define i64 @test_round_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -1033,29 +1074,28 @@ define i64 @test_round_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call round ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI15_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI15_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI15_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI15_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1114,41 +1154,48 @@ define signext i32 @test_roundeven_si32(double %x) { define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IFD-LABEL: test_roundeven_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call roundeven ; RV32IFD-NEXT: lui a0, %hi(.LCPI17_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI17_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB17_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB17_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB17_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB17_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB17_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB17_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_roundeven_si64: @@ -1165,44 +1212,47 @@ define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call roundeven -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI17_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI17_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI17_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI17_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI17_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB17_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB17_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB17_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI17_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI17_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB17_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB17_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1272,7 +1322,8 @@ define i64 @test_roundeven_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -1299,29 +1350,28 @@ define i64 @test_roundeven_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call roundeven ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI19_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI19_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI19_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI19_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1380,41 +1430,48 @@ define signext i32 @test_rint_si32(double %x) { define i64 @test_rint_si64(double %x) nounwind { ; RV32IFD-LABEL: test_rint_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call rint ; RV32IFD-NEXT: lui a0, %hi(.LCPI21_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI21_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI21_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB21_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB21_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB21_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB21_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB21_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB21_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_rint_si64: @@ -1431,44 +1488,47 @@ define i64 @test_rint_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call rint -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB21_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB21_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB21_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI21_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI21_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB21_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB21_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1538,7 +1598,8 @@ define i64 @test_rint_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -1565,29 +1626,28 @@ define i64 @test_rint_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call rint ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI23_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI23_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI23_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI23_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index f1e444b5b624b4..1a0e4e18291158 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -275,24 +275,26 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lui a1, 325632 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi -; RV32I-NEXT: and s1, s1, a0 -; RV32I-NEXT: lui a1, 325632 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: or a0, a0, s1 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -613,40 +615,47 @@ define i64 @fcvt_l_s(float %a) nounwind { define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32IF-LABEL: fcvt_l_s_sat: ; RV32IF: # %bb.0: # %start -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: addi sp, sp, -32 +; RV32IF-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: lui a0, %hi(.LCPI12_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: flt.s s0, fa5, fa0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 -; RV32IF-NEXT: fle.s s0, fa5, fa0 +; RV32IF-NEXT: fle.s s2, fa5, fa0 +; RV32IF-NEXT: neg s3, s2 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: and a0, s3, a0 +; RV32IF-NEXT: or a0, s1, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB12_2 +; RV32IF-NEXT: li a5, 1 +; RV32IF-NEXT: lui a3, 524288 +; RV32IF-NEXT: bne s2, a5, .LBB12_2 ; RV32IF-NEXT: # %bb.1: # %start -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a3, a1 ; RV32IF-NEXT: .LBB12_2: # %start -; RV32IF-NEXT: lui a1, %hi(.LCPI12_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 -; RV32IF-NEXT: beqz a3, .LBB12_4 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: beqz s0, .LBB12_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a3, a4, -1 ; RV32IF-NEXT: .LBB12_4: # %start -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: neg a3, s0 -; RV32IF-NEXT: and a0, a3, a0 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: and a1, a2, a3 +; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_l_s_sat: @@ -664,35 +673,38 @@ define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 913408 ; RV32IZFINX-NEXT: fle.s s1, a0, s0 +; RV32IZFINX-NEXT: neg s2, s1 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixsfdi +; RV32IZFINX-NEXT: lui a2, %hi(.LCPI12_0) +; RV32IZFINX-NEXT: lw a2, %lo(.LCPI12_0)(a2) +; RV32IZFINX-NEXT: and a0, s2, a0 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 +; RV32IZFINX-NEXT: or a0, a2, a0 +; RV32IZFINX-NEXT: feq.s a2, s0, s0 +; RV32IZFINX-NEXT: neg a2, a2 +; RV32IZFINX-NEXT: lui a5, 524288 +; RV32IZFINX-NEXT: li a6, 1 ; RV32IZFINX-NEXT: lui a4, 524288 -; RV32IZFINX-NEXT: lui a2, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB12_2 +; RV32IZFINX-NEXT: bne s1, a6, .LBB12_2 ; RV32IZFINX-NEXT: # %bb.1: # %start -; RV32IZFINX-NEXT: mv a2, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB12_2: # %start -; RV32IZFINX-NEXT: lui a1, %hi(.LCPI12_0) -; RV32IZFINX-NEXT: lw a1, %lo(.LCPI12_0)(a1) -; RV32IZFINX-NEXT: flt.s a3, a1, s0 +; RV32IZFINX-NEXT: and a0, a2, a0 ; RV32IZFINX-NEXT: beqz a3, .LBB12_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: addi a2, a4, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB12_4: # %start -; RV32IZFINX-NEXT: feq.s a1, s0, s0 -; RV32IZFINX-NEXT: neg a4, a1 -; RV32IZFINX-NEXT: and a1, a4, a2 -; RV32IZFINX-NEXT: neg a2, s1 -; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: neg a2, a3 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a0, a4, a0 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -863,23 +875,23 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: lui a0, %hi(.LCPI14_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a0) +; RV32IF-NEXT: flt.s a0, fa5, fa0 +; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.w.x fa5, zero ; RV32IF-NEXT: fle.s a0, fa5, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: xori a0, a0, 1 +; RV32IF-NEXT: addi s1, a0, -1 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: or a0, s0, a0 +; RV32IF-NEXT: and a1, s1, a1 +; RV32IF-NEXT: or a1, s0, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -898,19 +910,18 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: mv s0, a0 -; RV32IZFINX-NEXT: fle.s a0, zero, a0 -; RV32IZFINX-NEXT: neg s1, a0 -; RV32IZFINX-NEXT: mv a0, s0 +; RV32IZFINX-NEXT: lui a1, %hi(.LCPI14_0) +; RV32IZFINX-NEXT: lw a1, %lo(.LCPI14_0)(a1) +; RV32IZFINX-NEXT: flt.s a1, a1, a0 +; RV32IZFINX-NEXT: neg s0, a1 +; RV32IZFINX-NEXT: fle.s a1, zero, a0 +; RV32IZFINX-NEXT: xori a1, a1, 1 +; RV32IZFINX-NEXT: addi s1, a1, -1 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI14_0)(a2) ; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 +; RV32IZFINX-NEXT: or a0, s0, a0 ; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: or a1, s0, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -928,36 +939,33 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; ; RV32I-LABEL: fcvt_lu_s_sat: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lui a1, 391168 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 ; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfdi -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: and s3, s2, a0 -; RV32I-NEXT: lui a1, 391168 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg a1, a0 -; RV32I-NEXT: or a0, a1, s3 -; RV32I-NEXT: and a2, s2, s1 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 +; RV32I-NEXT: and a1, s2, a1 +; RV32I-NEXT: or a1, s1, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_lu_s_sat: @@ -966,24 +974,26 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lui a1, 391168 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: call __gtsf2 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: neg s1, a0 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2 ; RV64I-NEXT: slti a0, a0, 0 -; RV64I-NEXT: addi s1, a0, -1 +; RV64I-NEXT: addi s2, a0, -1 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixunssfdi -; RV64I-NEXT: and s1, s1, a0 -; RV64I-NEXT: lui a1, 391168 -; RV64I-NEXT: addiw a1, a1, -1 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __gtsf2 -; RV64I-NEXT: sgtz a0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: or a0, a0, s1 +; RV64I-NEXT: and a0, s2, a0 +; RV64I-NEXT: or a0, s1, a0 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: @@ -2089,24 +2099,26 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lui a1, 325632 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi -; RV32I-NEXT: and s1, s1, a0 -; RV32I-NEXT: lui a1, 325632 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: or a0, a0, s1 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll index 5e99c7eb905628..f91aac11876d41 100644 --- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -37,7 +37,8 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -52,32 +53,34 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI1_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB1_4 +; RV32IF-NEXT: bne s0, a6, .LBB1_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB1_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI1_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB1_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB1_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -115,23 +118,24 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB1_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB1_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB1_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB1_6 +; RV32IZFINX-NEXT: beqz a3, .LBB1_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB1_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -180,8 +184,7 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -193,22 +196,22 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rdn ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB3_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI3_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI3_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -226,7 +229,6 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -237,21 +239,21 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rdn ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB3_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI3_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI3_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -297,7 +299,8 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -312,32 +315,34 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI5_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB5_4 +; RV32IF-NEXT: bne s0, a6, .LBB5_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB5_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI5_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB5_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB5_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -375,23 +380,24 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI5_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB5_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB5_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB5_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB5_6 +; RV32IZFINX-NEXT: beqz a3, .LBB5_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB5_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -440,8 +446,7 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -453,22 +458,22 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rup ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB7_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI7_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI7_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -486,7 +491,6 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -497,21 +501,21 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rup ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB7_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI7_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI7_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -557,7 +561,8 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -572,32 +577,34 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI9_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB9_4 +; RV32IF-NEXT: bne s0, a6, .LBB9_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB9_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI9_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB9_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB9_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -635,23 +642,24 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI9_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB9_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB9_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB9_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB9_6 +; RV32IZFINX-NEXT: beqz a3, .LBB9_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB9_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -700,8 +708,7 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -713,22 +720,22 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rtz ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB11_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI11_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI11_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -746,7 +753,6 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -757,21 +763,21 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rtz ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB11_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI11_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI11_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -817,7 +823,8 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -832,32 +839,34 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI13_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB13_4 +; RV32IF-NEXT: bne s0, a6, .LBB13_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB13_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI13_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB13_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB13_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -895,23 +904,24 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI13_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB13_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB13_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB13_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB13_6 +; RV32IZFINX-NEXT: beqz a3, .LBB13_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB13_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -960,8 +970,7 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -973,22 +982,22 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rmm ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB15_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI15_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI15_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1006,7 +1015,6 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -1017,21 +1025,21 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rmm ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB15_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI15_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI15_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -1077,7 +1085,8 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1092,32 +1101,34 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI17_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB17_4 +; RV32IF-NEXT: bne s0, a6, .LBB17_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB17_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI17_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB17_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB17_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1155,23 +1166,24 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI17_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB17_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB17_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB17_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB17_6 +; RV32IZFINX-NEXT: beqz a3, .LBB17_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB17_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1220,8 +1232,7 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1233,22 +1244,22 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rne ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB19_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI19_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI19_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1266,7 +1277,6 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -1277,21 +1287,21 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rne ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB19_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI19_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI19_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -1337,7 +1347,8 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1352,32 +1363,34 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI21_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB21_4 +; RV32IF-NEXT: bne s0, a6, .LBB21_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB21_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI21_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB21_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB21_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1415,23 +1428,24 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI21_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB21_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB21_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB21_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB21_6 +; RV32IZFINX-NEXT: beqz a3, .LBB21_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB21_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1480,8 +1494,7 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1493,22 +1506,22 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0 ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB23_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1526,7 +1539,6 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -1537,21 +1549,21 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0 ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB23_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI23_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI23_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index f6a53a9d76dd35..659e0748dd5325 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -3672,7 +3672,8 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind { ; RV32-NEXT: .LBB52_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: sltiu a0, a4, 2 -; RV32-NEXT: seqz a2, a1 +; RV32-NEXT: snez a2, a1 +; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: mv a2, a4 ; RV32-NEXT: bnez a0, .LBB52_1 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 9e93ad0043a7e0..630d16e7c888b9 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -115,7 +115,8 @@ define i32 @utest_f64i32(double %x) { ; RV32IF-NEXT: .cfi_offset ra, -4 ; RV32IF-NEXT: call __fixunsdfdi ; RV32IF-NEXT: sltiu a2, a0, -1 -; RV32IF-NEXT: seqz a1, a1 +; RV32IF-NEXT: snez a1, a1 +; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: or a0, a1, a0 @@ -430,7 +431,8 @@ define i32 @utesth_f16i32(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: call __fixunssfdi ; RV32-NEXT: sltiu a2, a0, -1 -; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: or a0, a1, a0 @@ -1043,8 +1045,8 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a2, 20(sp) ; RV32IF-NEXT: lw a1, 12(sp) ; RV32IF-NEXT: lw a4, 8(sp) ; RV32IF-NEXT: lui a3, 524288 @@ -1052,25 +1054,25 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: beq a1, a5, .LBB18_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: sltu a6, a1, a5 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: bnez a7, .LBB18_3 ; RV32IF-NEXT: j .LBB18_4 ; RV32IF-NEXT: .LBB18_2: ; RV32IF-NEXT: sltiu a6, a4, -1 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: beqz a7, .LBB18_4 ; RV32IF-NEXT: .LBB18_3: # %entry -; RV32IF-NEXT: slti a6, a0, 0 +; RV32IF-NEXT: slti a6, a2, 0 ; RV32IF-NEXT: .LBB18_4: # %entry -; RV32IF-NEXT: neg a7, a6 -; RV32IF-NEXT: addi t0, a6, -1 +; RV32IF-NEXT: addi a7, a6, -1 +; RV32IF-NEXT: neg t0, a6 ; RV32IF-NEXT: bnez a6, .LBB18_6 ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB18_6: # %entry -; RV32IF-NEXT: or a4, t0, a4 -; RV32IF-NEXT: and a5, a7, a0 -; RV32IF-NEXT: and a2, a7, a2 +; RV32IF-NEXT: or a4, a7, a4 +; RV32IF-NEXT: and a2, t0, a2 +; RV32IF-NEXT: and a5, t0, a0 ; RV32IF-NEXT: beq a1, a3, .LBB18_8 ; RV32IF-NEXT: # %bb.7: # %entry ; RV32IF-NEXT: sltu a0, a3, a1 @@ -1078,11 +1080,11 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: .LBB18_8: ; RV32IF-NEXT: snez a0, a4 ; RV32IF-NEXT: .LBB18_9: # %entry -; RV32IF-NEXT: and a2, a2, a5 +; RV32IF-NEXT: and a5, a5, a2 ; RV32IF-NEXT: li a3, -1 -; RV32IF-NEXT: beq a2, a3, .LBB18_11 +; RV32IF-NEXT: beq a5, a3, .LBB18_11 ; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: slti a0, a5, 0 +; RV32IF-NEXT: slti a0, a2, 0 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: .LBB18_11: # %entry ; RV32IF-NEXT: bnez a0, .LBB18_13 @@ -1142,8 +1144,8 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a2, 20(sp) ; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: lw a4, 8(sp) ; RV32IFD-NEXT: lui a3, 524288 @@ -1151,25 +1153,25 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: beq a1, a5, .LBB18_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: sltu a6, a1, a5 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: bnez a7, .LBB18_3 ; RV32IFD-NEXT: j .LBB18_4 ; RV32IFD-NEXT: .LBB18_2: ; RV32IFD-NEXT: sltiu a6, a4, -1 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: beqz a7, .LBB18_4 ; RV32IFD-NEXT: .LBB18_3: # %entry -; RV32IFD-NEXT: slti a6, a0, 0 +; RV32IFD-NEXT: slti a6, a2, 0 ; RV32IFD-NEXT: .LBB18_4: # %entry -; RV32IFD-NEXT: neg a7, a6 -; RV32IFD-NEXT: addi t0, a6, -1 +; RV32IFD-NEXT: addi a7, a6, -1 +; RV32IFD-NEXT: neg t0, a6 ; RV32IFD-NEXT: bnez a6, .LBB18_6 ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB18_6: # %entry -; RV32IFD-NEXT: or a4, t0, a4 -; RV32IFD-NEXT: and a5, a7, a0 -; RV32IFD-NEXT: and a2, a7, a2 +; RV32IFD-NEXT: or a4, a7, a4 +; RV32IFD-NEXT: and a2, t0, a2 +; RV32IFD-NEXT: and a5, t0, a0 ; RV32IFD-NEXT: beq a1, a3, .LBB18_8 ; RV32IFD-NEXT: # %bb.7: # %entry ; RV32IFD-NEXT: sltu a0, a3, a1 @@ -1177,11 +1179,11 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: .LBB18_8: ; RV32IFD-NEXT: snez a0, a4 ; RV32IFD-NEXT: .LBB18_9: # %entry -; RV32IFD-NEXT: and a2, a2, a5 +; RV32IFD-NEXT: and a5, a5, a2 ; RV32IFD-NEXT: li a3, -1 -; RV32IFD-NEXT: beq a2, a3, .LBB18_11 +; RV32IFD-NEXT: beq a5, a3, .LBB18_11 ; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: slti a0, a5, 0 +; RV32IFD-NEXT: slti a0, a2, 0 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: .LBB18_11: # %entry ; RV32IFD-NEXT: bnez a0, .LBB18_13 @@ -1227,8 +1229,10 @@ define i64 @utest_f64i64(double %x) { ; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: or a4, a1, a0 -; RV32IF-NEXT: seqz a4, a4 +; RV32IF-NEXT: seqz a4, a0 +; RV32IF-NEXT: snez a5, a1 +; RV32IF-NEXT: addi a5, a5, -1 +; RV32IF-NEXT: and a4, a5, a4 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: seqz a0, a0 @@ -1267,8 +1271,10 @@ define i64 @utest_f64i64(double %x) { ; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: or a4, a1, a0 -; RV32IFD-NEXT: seqz a4, a4 +; RV32IFD-NEXT: seqz a4, a0 +; RV32IFD-NEXT: snez a5, a1 +; RV32IFD-NEXT: addi a5, a5, -1 +; RV32IFD-NEXT: and a4, a5, a4 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: seqz a0, a0 @@ -1318,8 +1324,8 @@ define i64 @ustest_f64i64(double %x) { ; RV32IF-NEXT: # %bb.4: # %entry ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: .LBB20_5: # %entry -; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: lw a4, 12(sp) +; RV32IF-NEXT: lw a4, 8(sp) +; RV32IF-NEXT: lw a3, 12(sp) ; RV32IF-NEXT: and a5, a2, a1 ; RV32IF-NEXT: beqz a5, .LBB20_7 ; RV32IF-NEXT: # %bb.6: # %entry @@ -1328,17 +1334,18 @@ define i64 @ustest_f64i64(double %x) { ; RV32IF-NEXT: .LBB20_7: ; RV32IF-NEXT: snez a1, a0 ; RV32IF-NEXT: .LBB20_8: # %entry -; RV32IF-NEXT: and a4, a2, a4 +; RV32IF-NEXT: and a3, a2, a3 ; RV32IF-NEXT: or a0, a0, a5 -; RV32IF-NEXT: and a2, a2, a3 +; RV32IF-NEXT: and a2, a2, a4 ; RV32IF-NEXT: bnez a0, .LBB20_10 ; RV32IF-NEXT: # %bb.9: -; RV32IF-NEXT: or a0, a2, a4 -; RV32IF-NEXT: snez a1, a0 +; RV32IF-NEXT: snez a0, a3 +; RV32IF-NEXT: snez a1, a2 +; RV32IF-NEXT: or a1, a1, a0 ; RV32IF-NEXT: .LBB20_10: # %entry ; RV32IF-NEXT: neg a1, a1 ; RV32IF-NEXT: and a0, a1, a2 -; RV32IF-NEXT: and a1, a1, a4 +; RV32IF-NEXT: and a1, a1, a3 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -1397,8 +1404,8 @@ define i64 @ustest_f64i64(double %x) { ; RV32IFD-NEXT: # %bb.4: # %entry ; RV32IFD-NEXT: li a0, 1 ; RV32IFD-NEXT: .LBB20_5: # %entry -; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: lw a4, 12(sp) +; RV32IFD-NEXT: lw a4, 8(sp) +; RV32IFD-NEXT: lw a3, 12(sp) ; RV32IFD-NEXT: and a5, a2, a1 ; RV32IFD-NEXT: beqz a5, .LBB20_7 ; RV32IFD-NEXT: # %bb.6: # %entry @@ -1407,17 +1414,18 @@ define i64 @ustest_f64i64(double %x) { ; RV32IFD-NEXT: .LBB20_7: ; RV32IFD-NEXT: snez a1, a0 ; RV32IFD-NEXT: .LBB20_8: # %entry -; RV32IFD-NEXT: and a4, a2, a4 +; RV32IFD-NEXT: and a3, a2, a3 ; RV32IFD-NEXT: or a0, a0, a5 -; RV32IFD-NEXT: and a2, a2, a3 +; RV32IFD-NEXT: and a2, a2, a4 ; RV32IFD-NEXT: bnez a0, .LBB20_10 ; RV32IFD-NEXT: # %bb.9: -; RV32IFD-NEXT: or a0, a2, a4 -; RV32IFD-NEXT: snez a1, a0 +; RV32IFD-NEXT: snez a0, a3 +; RV32IFD-NEXT: snez a1, a2 +; RV32IFD-NEXT: or a1, a1, a0 ; RV32IFD-NEXT: .LBB20_10: # %entry ; RV32IFD-NEXT: neg a1, a1 ; RV32IFD-NEXT: and a0, a1, a2 -; RV32IFD-NEXT: and a1, a1, a4 +; RV32IFD-NEXT: and a1, a1, a3 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -1440,8 +1448,8 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -1449,25 +1457,25 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: beq a1, a5, .LBB21_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB21_3 ; RV32-NEXT: j .LBB21_4 ; RV32-NEXT: .LBB21_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB21_4 ; RV32-NEXT: .LBB21_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB21_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB21_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB21_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB21_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -1475,11 +1483,11 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: .LBB21_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB21_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB21_11 +; RV32-NEXT: beq a5, a3, .LBB21_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB21_11: # %entry ; RV32-NEXT: bnez a0, .LBB21_13 @@ -1523,8 +1531,10 @@ define i64 @utest_f32i64(float %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -1586,8 +1596,8 @@ define i64 @ustest_f32i64(float %x) { ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB23_5: # %entry -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a3, 12(sp) ; RV32-NEXT: and a5, a2, a1 ; RV32-NEXT: beqz a5, .LBB23_7 ; RV32-NEXT: # %bb.6: # %entry @@ -1596,17 +1606,18 @@ define i64 @ustest_f32i64(float %x) { ; RV32-NEXT: .LBB23_7: ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB23_8: # %entry -; RV32-NEXT: and a4, a2, a4 +; RV32-NEXT: and a3, a2, a3 ; RV32-NEXT: or a0, a0, a5 -; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: and a2, a2, a4 ; RV32-NEXT: bnez a0, .LBB23_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: or a0, a2, a4 -; RV32-NEXT: snez a1, a0 +; RV32-NEXT: snez a0, a3 +; RV32-NEXT: snez a1, a2 +; RV32-NEXT: or a1, a1, a0 ; RV32-NEXT: .LBB23_10: # %entry ; RV32-NEXT: neg a1, a1 ; RV32-NEXT: and a0, a1, a2 -; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -1657,8 +1668,8 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -1666,25 +1677,25 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: beq a1, a5, .LBB24_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB24_3 ; RV32-NEXT: j .LBB24_4 ; RV32-NEXT: .LBB24_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB24_4 ; RV32-NEXT: .LBB24_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB24_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB24_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB24_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB24_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -1692,11 +1703,11 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: .LBB24_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB24_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB24_11 +; RV32-NEXT: beq a5, a3, .LBB24_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB24_11: # %entry ; RV32-NEXT: bnez a0, .LBB24_13 @@ -1772,8 +1783,10 @@ define i64 @utesth_f16i64(half %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -1837,8 +1850,8 @@ define i64 @ustest_f16i64(half %x) { ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB26_5: # %entry -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a3, 12(sp) ; RV32-NEXT: and a5, a2, a1 ; RV32-NEXT: beqz a5, .LBB26_7 ; RV32-NEXT: # %bb.6: # %entry @@ -1847,17 +1860,18 @@ define i64 @ustest_f16i64(half %x) { ; RV32-NEXT: .LBB26_7: ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB26_8: # %entry -; RV32-NEXT: and a4, a2, a4 +; RV32-NEXT: and a3, a2, a3 ; RV32-NEXT: or a0, a0, a5 -; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: and a2, a2, a4 ; RV32-NEXT: bnez a0, .LBB26_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: or a0, a2, a4 -; RV32-NEXT: snez a1, a0 +; RV32-NEXT: snez a0, a3 +; RV32-NEXT: snez a1, a2 +; RV32-NEXT: or a1, a1, a0 ; RV32-NEXT: .LBB26_10: # %entry ; RV32-NEXT: neg a1, a1 ; RV32-NEXT: and a0, a1, a2 -; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -2891,8 +2905,8 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a2, 20(sp) ; RV32IF-NEXT: lw a1, 12(sp) ; RV32IF-NEXT: lw a4, 8(sp) ; RV32IF-NEXT: lui a3, 524288 @@ -2900,25 +2914,25 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: beq a1, a5, .LBB45_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: sltu a6, a1, a5 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: bnez a7, .LBB45_3 ; RV32IF-NEXT: j .LBB45_4 ; RV32IF-NEXT: .LBB45_2: ; RV32IF-NEXT: sltiu a6, a4, -1 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: beqz a7, .LBB45_4 ; RV32IF-NEXT: .LBB45_3: # %entry -; RV32IF-NEXT: slti a6, a0, 0 +; RV32IF-NEXT: slti a6, a2, 0 ; RV32IF-NEXT: .LBB45_4: # %entry -; RV32IF-NEXT: neg a7, a6 -; RV32IF-NEXT: addi t0, a6, -1 +; RV32IF-NEXT: addi a7, a6, -1 +; RV32IF-NEXT: neg t0, a6 ; RV32IF-NEXT: bnez a6, .LBB45_6 ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB45_6: # %entry -; RV32IF-NEXT: or a4, t0, a4 -; RV32IF-NEXT: and a5, a7, a0 -; RV32IF-NEXT: and a2, a7, a2 +; RV32IF-NEXT: or a4, a7, a4 +; RV32IF-NEXT: and a2, t0, a2 +; RV32IF-NEXT: and a5, t0, a0 ; RV32IF-NEXT: beq a1, a3, .LBB45_8 ; RV32IF-NEXT: # %bb.7: # %entry ; RV32IF-NEXT: sltu a0, a3, a1 @@ -2926,11 +2940,11 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: .LBB45_8: ; RV32IF-NEXT: snez a0, a4 ; RV32IF-NEXT: .LBB45_9: # %entry -; RV32IF-NEXT: and a2, a2, a5 +; RV32IF-NEXT: and a5, a5, a2 ; RV32IF-NEXT: li a3, -1 -; RV32IF-NEXT: beq a2, a3, .LBB45_11 +; RV32IF-NEXT: beq a5, a3, .LBB45_11 ; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: slti a0, a5, 0 +; RV32IF-NEXT: slti a0, a2, 0 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: .LBB45_11: # %entry ; RV32IF-NEXT: bnez a0, .LBB45_13 @@ -2990,8 +3004,8 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a2, 20(sp) ; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: lw a4, 8(sp) ; RV32IFD-NEXT: lui a3, 524288 @@ -2999,25 +3013,25 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: beq a1, a5, .LBB45_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: sltu a6, a1, a5 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: bnez a7, .LBB45_3 ; RV32IFD-NEXT: j .LBB45_4 ; RV32IFD-NEXT: .LBB45_2: ; RV32IFD-NEXT: sltiu a6, a4, -1 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: beqz a7, .LBB45_4 ; RV32IFD-NEXT: .LBB45_3: # %entry -; RV32IFD-NEXT: slti a6, a0, 0 +; RV32IFD-NEXT: slti a6, a2, 0 ; RV32IFD-NEXT: .LBB45_4: # %entry -; RV32IFD-NEXT: neg a7, a6 -; RV32IFD-NEXT: addi t0, a6, -1 +; RV32IFD-NEXT: addi a7, a6, -1 +; RV32IFD-NEXT: neg t0, a6 ; RV32IFD-NEXT: bnez a6, .LBB45_6 ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB45_6: # %entry -; RV32IFD-NEXT: or a4, t0, a4 -; RV32IFD-NEXT: and a5, a7, a0 -; RV32IFD-NEXT: and a2, a7, a2 +; RV32IFD-NEXT: or a4, a7, a4 +; RV32IFD-NEXT: and a2, t0, a2 +; RV32IFD-NEXT: and a5, t0, a0 ; RV32IFD-NEXT: beq a1, a3, .LBB45_8 ; RV32IFD-NEXT: # %bb.7: # %entry ; RV32IFD-NEXT: sltu a0, a3, a1 @@ -3025,11 +3039,11 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .LBB45_8: ; RV32IFD-NEXT: snez a0, a4 ; RV32IFD-NEXT: .LBB45_9: # %entry -; RV32IFD-NEXT: and a2, a2, a5 +; RV32IFD-NEXT: and a5, a5, a2 ; RV32IFD-NEXT: li a3, -1 -; RV32IFD-NEXT: beq a2, a3, .LBB45_11 +; RV32IFD-NEXT: beq a5, a3, .LBB45_11 ; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: slti a0, a5, 0 +; RV32IFD-NEXT: slti a0, a2, 0 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: .LBB45_11: # %entry ; RV32IFD-NEXT: bnez a0, .LBB45_13 @@ -3073,8 +3087,10 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: or a4, a1, a0 -; RV32IF-NEXT: seqz a4, a4 +; RV32IF-NEXT: seqz a4, a0 +; RV32IF-NEXT: snez a5, a1 +; RV32IF-NEXT: addi a5, a5, -1 +; RV32IF-NEXT: and a4, a5, a4 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: seqz a0, a0 @@ -3113,8 +3129,10 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: or a4, a1, a0 -; RV32IFD-NEXT: seqz a4, a4 +; RV32IFD-NEXT: seqz a4, a0 +; RV32IFD-NEXT: snez a5, a1 +; RV32IFD-NEXT: addi a5, a5, -1 +; RV32IFD-NEXT: and a4, a5, a4 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: seqz a0, a0 @@ -3144,30 +3162,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 8(sp) -; RV32IF-NEXT: lw a1, 12(sp) -; RV32IF-NEXT: lw a2, 20(sp) +; RV32IF-NEXT: lw a0, 20(sp) +; RV32IF-NEXT: lw a1, 8(sp) +; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 16(sp) -; RV32IF-NEXT: beqz a2, .LBB47_2 +; RV32IF-NEXT: beqz a0, .LBB47_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: slti a4, a2, 0 +; RV32IF-NEXT: slti a4, a0, 0 ; RV32IF-NEXT: j .LBB47_3 ; RV32IF-NEXT: .LBB47_2: ; RV32IF-NEXT: seqz a4, a3 ; RV32IF-NEXT: .LBB47_3: # %entry ; RV32IF-NEXT: xori a3, a3, 1 -; RV32IF-NEXT: or a3, a3, a2 +; RV32IF-NEXT: or a3, a3, a0 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a3, a3, a4 ; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: and a2, a3, a2 ; RV32IF-NEXT: and a1, a3, a1 ; RV32IF-NEXT: and a0, a3, a0 -; RV32IF-NEXT: and a2, a3, a2 -; RV32IF-NEXT: slti a2, a2, 0 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: slti a0, a0, 0 +; RV32IF-NEXT: addi a3, a0, -1 +; RV32IF-NEXT: and a0, a3, a1 +; RV32IF-NEXT: and a1, a3, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -3202,30 +3220,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: lw a2, 20(sp) +; RV32IFD-NEXT: lw a0, 20(sp) +; RV32IFD-NEXT: lw a1, 8(sp) +; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 16(sp) -; RV32IFD-NEXT: beqz a2, .LBB47_2 +; RV32IFD-NEXT: beqz a0, .LBB47_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: slti a4, a2, 0 +; RV32IFD-NEXT: slti a4, a0, 0 ; RV32IFD-NEXT: j .LBB47_3 ; RV32IFD-NEXT: .LBB47_2: ; RV32IFD-NEXT: seqz a4, a3 ; RV32IFD-NEXT: .LBB47_3: # %entry ; RV32IFD-NEXT: xori a3, a3, 1 -; RV32IFD-NEXT: or a3, a3, a2 +; RV32IFD-NEXT: or a3, a3, a0 ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a3, a3, a4 ; RV32IFD-NEXT: neg a3, a3 +; RV32IFD-NEXT: and a2, a3, a2 ; RV32IFD-NEXT: and a1, a3, a1 ; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: and a2, a3, a2 -; RV32IFD-NEXT: slti a2, a2, 0 -; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: and a0, a2, a0 -; RV32IFD-NEXT: and a1, a2, a1 +; RV32IFD-NEXT: slti a0, a0, 0 +; RV32IFD-NEXT: addi a3, a0, -1 +; RV32IFD-NEXT: and a0, a3, a1 +; RV32IFD-NEXT: and a1, a3, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -3246,8 +3264,8 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -3255,25 +3273,25 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: beq a1, a5, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB48_3 ; RV32-NEXT: j .LBB48_4 ; RV32-NEXT: .LBB48_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB48_4 ; RV32-NEXT: .LBB48_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB48_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB48_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB48_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -3281,11 +3299,11 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: .LBB48_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB48_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB48_11 +; RV32-NEXT: beq a5, a3, .LBB48_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB48_11: # %entry ; RV32-NEXT: bnez a0, .LBB48_13 @@ -3327,8 +3345,10 @@ define i64 @utest_f32i64_mm(float %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -3370,30 +3390,30 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB50_2 +; RV32-NEXT: beqz a0, .LBB50_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a4, a0, 0 ; RV32-NEXT: j .LBB50_3 ; RV32-NEXT: .LBB50_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB50_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a2 +; RV32-NEXT: or a3, a3, a0 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: slti a2, a2, 0 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: addi a3, a0, -1 +; RV32-NEXT: and a0, a3, a1 +; RV32-NEXT: and a1, a3, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -3437,8 +3457,8 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -3446,25 +3466,25 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: beq a1, a5, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB51_3 ; RV32-NEXT: j .LBB51_4 ; RV32-NEXT: .LBB51_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB51_4 ; RV32-NEXT: .LBB51_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB51_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB51_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -3472,11 +3492,11 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: .LBB51_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB51_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB51_11 +; RV32-NEXT: beq a5, a3, .LBB51_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB51_11: # %entry ; RV32-NEXT: bnez a0, .LBB51_13 @@ -3550,8 +3570,10 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -3595,30 +3617,30 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB53_2 +; RV32-NEXT: beqz a0, .LBB53_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a4, a0, 0 ; RV32-NEXT: j .LBB53_3 ; RV32-NEXT: .LBB53_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB53_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a2 +; RV32-NEXT: or a3, a3, a0 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: slti a2, a2, 0 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: addi a3, a0, -1 +; RV32-NEXT: and a0, a3, a1 +; RV32-NEXT: and a1, a3, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index daaceed3941c53..518cd7da2ab771 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -2145,41 +2145,48 @@ define i64 @fcvt_l_h(half %a) nounwind { define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_l_h_sat: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: addi sp, sp, -32 +; RV32IZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IZFH-NEXT: flt.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 -; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: fle.s s2, fa5, fs0 +; RV32IZFH-NEXT: neg s3, s2 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: and a0, s3, a0 +; RV32IZFH-NEXT: or a0, s1, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB10_2 +; RV32IZFH-NEXT: li a5, 1 +; RV32IZFH-NEXT: lui a3, 524288 +; RV32IZFH-NEXT: bne s2, a5, .LBB10_2 ; RV32IZFH-NEXT: # %bb.1: # %start -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a3, a1 ; RV32IZFH-NEXT: .LBB10_2: # %start -; RV32IZFH-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB10_4 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: beqz s0, .LBB10_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: addi a2, a4, -1 +; RV32IZFH-NEXT: addi a3, a4, -1 ; RV32IZFH-NEXT: .LBB10_4: # %start -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: neg a3, s0 -; RV32IZFH-NEXT: and a0, a3, a0 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: and a1, a2, a3 +; RV32IZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 32 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_l_h_sat: @@ -2193,41 +2200,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IDZFH-LABEL: fcvt_l_h_sat: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: addi sp, sp, -16 -; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: addi sp, sp, -32 +; RV32IDZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IDZFH-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFH-NEXT: flt.s s0, fa5, fs0 +; RV32IDZFH-NEXT: neg s1, s0 ; RV32IDZFH-NEXT: lui a0, 913408 ; RV32IDZFH-NEXT: fmv.w.x fa5, a0 -; RV32IDZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IDZFH-NEXT: fle.s s2, fa5, fs0 +; RV32IDZFH-NEXT: neg s3, s2 ; RV32IDZFH-NEXT: fmv.s fa0, fs0 ; RV32IDZFH-NEXT: call __fixsfdi +; RV32IDZFH-NEXT: and a0, s3, a0 +; RV32IDZFH-NEXT: or a0, s1, a0 +; RV32IDZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IDZFH-NEXT: neg a2, a2 ; RV32IDZFH-NEXT: lui a4, 524288 -; RV32IDZFH-NEXT: lui a2, 524288 -; RV32IDZFH-NEXT: beqz s0, .LBB10_2 +; RV32IDZFH-NEXT: li a5, 1 +; RV32IDZFH-NEXT: lui a3, 524288 +; RV32IDZFH-NEXT: bne s2, a5, .LBB10_2 ; RV32IDZFH-NEXT: # %bb.1: # %start -; RV32IDZFH-NEXT: mv a2, a1 +; RV32IDZFH-NEXT: mv a3, a1 ; RV32IDZFH-NEXT: .LBB10_2: # %start -; RV32IDZFH-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IDZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IDZFH-NEXT: beqz a3, .LBB10_4 +; RV32IDZFH-NEXT: and a0, a2, a0 +; RV32IDZFH-NEXT: beqz s0, .LBB10_4 ; RV32IDZFH-NEXT: # %bb.3: -; RV32IDZFH-NEXT: addi a2, a4, -1 +; RV32IDZFH-NEXT: addi a3, a4, -1 ; RV32IDZFH-NEXT: .LBB10_4: # %start -; RV32IDZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IDZFH-NEXT: neg a4, a1 -; RV32IDZFH-NEXT: and a1, a4, a2 -; RV32IDZFH-NEXT: neg a2, a3 -; RV32IDZFH-NEXT: neg a3, s0 -; RV32IDZFH-NEXT: and a0, a3, a0 -; RV32IDZFH-NEXT: or a0, a2, a0 -; RV32IDZFH-NEXT: and a0, a4, a0 -; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: and a1, a2, a3 +; RV32IDZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: addi sp, sp, 32 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_l_h_sat: @@ -2263,8 +2277,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: li a5, 1 ; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s3, .LBB10_2 +; RV32IZHINX-NEXT: bne s3, a5, .LBB10_2 ; RV32IZHINX-NEXT: # %bb.1: # %start ; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB10_2: # %start @@ -2316,8 +2331,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZDINXZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZDINXZHINX-NEXT: neg a2, a2 ; RV32IZDINXZHINX-NEXT: lui a4, 524288 +; RV32IZDINXZHINX-NEXT: li a5, 1 ; RV32IZDINXZHINX-NEXT: lui a3, 524288 -; RV32IZDINXZHINX-NEXT: beqz s3, .LBB10_2 +; RV32IZDINXZHINX-NEXT: bne s3, a5, .LBB10_2 ; RV32IZDINXZHINX-NEXT: # %bb.1: # %start ; RV32IZDINXZHINX-NEXT: mv a3, a1 ; RV32IZDINXZHINX-NEXT: .LBB10_2: # %start @@ -2448,42 +2464,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32ID-ILP32-LABEL: fcvt_l_h_sat: ; RV32ID-ILP32: # %bb.0: # %start -; RV32ID-ILP32-NEXT: addi sp, sp, -16 -; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: addi sp, sp, -32 +; RV32ID-ILP32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 +; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI10_0) +; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI10_0)(a1) ; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: fsw fa4, 8(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: flt.s s0, fa5, fa4 +; RV32ID-ILP32-NEXT: neg s1, s0 ; RV32ID-ILP32-NEXT: lui a1, 913408 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32-NEXT: fsw fa4, 4(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: fle.s s0, fa5, fa4 +; RV32ID-ILP32-NEXT: fle.s s2, fa5, fa4 +; RV32ID-ILP32-NEXT: neg s3, s2 ; RV32ID-ILP32-NEXT: call __fixsfdi +; RV32ID-ILP32-NEXT: and a0, s3, a0 +; RV32ID-ILP32-NEXT: or a0, s1, a0 +; RV32ID-ILP32-NEXT: flw fa5, 8(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: feq.s a2, fa5, fa5 +; RV32ID-ILP32-NEXT: neg a2, a2 ; RV32ID-ILP32-NEXT: lui a4, 524288 -; RV32ID-ILP32-NEXT: lui a2, 524288 -; RV32ID-ILP32-NEXT: beqz s0, .LBB10_2 +; RV32ID-ILP32-NEXT: li a5, 1 +; RV32ID-ILP32-NEXT: lui a3, 524288 +; RV32ID-ILP32-NEXT: bne s2, a5, .LBB10_2 ; RV32ID-ILP32-NEXT: # %bb.1: # %start -; RV32ID-ILP32-NEXT: mv a2, a1 +; RV32ID-ILP32-NEXT: mv a3, a1 ; RV32ID-ILP32-NEXT: .LBB10_2: # %start -; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32ID-ILP32-NEXT: flw fa4, 4(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: flt.s a3, fa5, fa4 -; RV32ID-ILP32-NEXT: fmv.s fa5, fa4 -; RV32ID-ILP32-NEXT: beqz a3, .LBB10_4 +; RV32ID-ILP32-NEXT: and a0, a2, a0 +; RV32ID-ILP32-NEXT: beqz s0, .LBB10_4 ; RV32ID-ILP32-NEXT: # %bb.3: -; RV32ID-ILP32-NEXT: addi a2, a4, -1 +; RV32ID-ILP32-NEXT: addi a3, a4, -1 ; RV32ID-ILP32-NEXT: .LBB10_4: # %start -; RV32ID-ILP32-NEXT: feq.s a1, fa5, fa5 -; RV32ID-ILP32-NEXT: neg a4, a1 -; RV32ID-ILP32-NEXT: and a1, a4, a2 -; RV32ID-ILP32-NEXT: neg a2, a3 -; RV32ID-ILP32-NEXT: neg a3, s0 -; RV32ID-ILP32-NEXT: and a0, a3, a0 -; RV32ID-ILP32-NEXT: or a0, a2, a0 -; RV32ID-ILP32-NEXT: and a0, a4, a0 -; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: addi sp, sp, 16 +; RV32ID-ILP32-NEXT: and a1, a2, a3 +; RV32ID-ILP32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: addi sp, sp, 32 ; RV32ID-ILP32-NEXT: ret ; ; RV64ID-LP64-LABEL: fcvt_l_h_sat: @@ -2503,41 +2525,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32ID-LABEL: fcvt_l_h_sat: ; RV32ID: # %bb.0: # %start -; RV32ID-NEXT: addi sp, sp, -16 -; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-NEXT: addi sp, sp, -32 +; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 +; RV32ID-NEXT: lui a0, %hi(.LCPI10_0) +; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32ID-NEXT: fmv.s fs0, fa0 +; RV32ID-NEXT: flt.s s0, fa5, fa0 +; RV32ID-NEXT: neg s1, s0 ; RV32ID-NEXT: lui a0, 913408 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fle.s s0, fa5, fa0 +; RV32ID-NEXT: fle.s s2, fa5, fa0 +; RV32ID-NEXT: neg s3, s2 ; RV32ID-NEXT: call __fixsfdi +; RV32ID-NEXT: and a0, s3, a0 +; RV32ID-NEXT: or a0, s1, a0 +; RV32ID-NEXT: feq.s a2, fs0, fs0 +; RV32ID-NEXT: neg a2, a2 ; RV32ID-NEXT: lui a4, 524288 -; RV32ID-NEXT: lui a2, 524288 -; RV32ID-NEXT: beqz s0, .LBB10_2 +; RV32ID-NEXT: li a5, 1 +; RV32ID-NEXT: lui a3, 524288 +; RV32ID-NEXT: bne s2, a5, .LBB10_2 ; RV32ID-NEXT: # %bb.1: # %start -; RV32ID-NEXT: mv a2, a1 +; RV32ID-NEXT: mv a3, a1 ; RV32ID-NEXT: .LBB10_2: # %start -; RV32ID-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32ID-NEXT: flt.s a3, fa5, fs0 -; RV32ID-NEXT: beqz a3, .LBB10_4 +; RV32ID-NEXT: and a0, a2, a0 +; RV32ID-NEXT: beqz s0, .LBB10_4 ; RV32ID-NEXT: # %bb.3: -; RV32ID-NEXT: addi a2, a4, -1 +; RV32ID-NEXT: addi a3, a4, -1 ; RV32ID-NEXT: .LBB10_4: # %start -; RV32ID-NEXT: feq.s a1, fs0, fs0 -; RV32ID-NEXT: neg a4, a1 -; RV32ID-NEXT: and a1, a4, a2 -; RV32ID-NEXT: neg a2, a3 -; RV32ID-NEXT: neg a3, s0 -; RV32ID-NEXT: and a0, a3, a0 -; RV32ID-NEXT: or a0, a2, a0 -; RV32ID-NEXT: and a0, a4, a0 -; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ID-NEXT: and a1, a2, a3 +; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32ID-NEXT: addi sp, sp, 16 +; RV32ID-NEXT: addi sp, sp, 32 ; RV32ID-NEXT: ret ; ; RV64ID-LABEL: fcvt_l_h_sat: @@ -2556,41 +2585,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IFZFHMIN-LABEL: fcvt_l_h_sat: ; RV32IFZFHMIN: # %bb.0: # %start -; RV32IFZFHMIN-NEXT: addi sp, sp, -16 -; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: addi sp, sp, -32 +; RV32IFZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IFZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IFZFHMIN-NEXT: flt.s s0, fa5, fs0 +; RV32IFZFHMIN-NEXT: neg s1, s0 ; RV32IFZFHMIN-NEXT: lui a0, 913408 ; RV32IFZFHMIN-NEXT: fmv.w.x fa5, a0 -; RV32IFZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IFZFHMIN-NEXT: fle.s s2, fa5, fs0 +; RV32IFZFHMIN-NEXT: neg s3, s2 ; RV32IFZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IFZFHMIN-NEXT: call __fixsfdi +; RV32IFZFHMIN-NEXT: and a0, s3, a0 +; RV32IFZFHMIN-NEXT: or a0, s1, a0 +; RV32IFZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IFZFHMIN-NEXT: neg a2, a2 ; RV32IFZFHMIN-NEXT: lui a4, 524288 -; RV32IFZFHMIN-NEXT: lui a2, 524288 -; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_2 +; RV32IFZFHMIN-NEXT: li a5, 1 +; RV32IFZFHMIN-NEXT: lui a3, 524288 +; RV32IFZFHMIN-NEXT: bne s2, a5, .LBB10_2 ; RV32IFZFHMIN-NEXT: # %bb.1: # %start -; RV32IFZFHMIN-NEXT: mv a2, a1 +; RV32IFZFHMIN-NEXT: mv a3, a1 ; RV32IFZFHMIN-NEXT: .LBB10_2: # %start -; RV32IFZFHMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IFZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IFZFHMIN-NEXT: beqz a3, .LBB10_4 +; RV32IFZFHMIN-NEXT: and a0, a2, a0 +; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_4 ; RV32IFZFHMIN-NEXT: # %bb.3: -; RV32IFZFHMIN-NEXT: addi a2, a4, -1 +; RV32IFZFHMIN-NEXT: addi a3, a4, -1 ; RV32IFZFHMIN-NEXT: .LBB10_4: # %start -; RV32IFZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IFZFHMIN-NEXT: neg a4, a1 -; RV32IFZFHMIN-NEXT: and a1, a4, a2 -; RV32IFZFHMIN-NEXT: neg a2, a3 -; RV32IFZFHMIN-NEXT: neg a3, s0 -; RV32IFZFHMIN-NEXT: and a0, a3, a0 -; RV32IFZFHMIN-NEXT: or a0, a2, a0 -; RV32IFZFHMIN-NEXT: and a0, a4, a0 -; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: and a1, a2, a3 +; RV32IFZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 32 ; RV32IFZFHMIN-NEXT: ret ; ; CHECK64-IZFHMIN-LABEL: fcvt_l_h_sat: @@ -2605,41 +2641,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IDZFHMIN-LABEL: fcvt_l_h_sat: ; RV32IDZFHMIN: # %bb.0: # %start -; RV32IDZFHMIN-NEXT: addi sp, sp, -16 -; RV32IDZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: addi sp, sp, -32 +; RV32IDZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IDZFHMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IDZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFHMIN-NEXT: flt.s s0, fa5, fs0 +; RV32IDZFHMIN-NEXT: neg s1, s0 ; RV32IDZFHMIN-NEXT: lui a0, 913408 ; RV32IDZFHMIN-NEXT: fmv.w.x fa5, a0 -; RV32IDZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IDZFHMIN-NEXT: fle.s s2, fa5, fs0 +; RV32IDZFHMIN-NEXT: neg s3, s2 ; RV32IDZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IDZFHMIN-NEXT: call __fixsfdi +; RV32IDZFHMIN-NEXT: and a0, s3, a0 +; RV32IDZFHMIN-NEXT: or a0, s1, a0 +; RV32IDZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IDZFHMIN-NEXT: neg a2, a2 ; RV32IDZFHMIN-NEXT: lui a4, 524288 -; RV32IDZFHMIN-NEXT: lui a2, 524288 -; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_2 +; RV32IDZFHMIN-NEXT: li a5, 1 +; RV32IDZFHMIN-NEXT: lui a3, 524288 +; RV32IDZFHMIN-NEXT: bne s2, a5, .LBB10_2 ; RV32IDZFHMIN-NEXT: # %bb.1: # %start -; RV32IDZFHMIN-NEXT: mv a2, a1 +; RV32IDZFHMIN-NEXT: mv a3, a1 ; RV32IDZFHMIN-NEXT: .LBB10_2: # %start -; RV32IDZFHMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IDZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IDZFHMIN-NEXT: beqz a3, .LBB10_4 +; RV32IDZFHMIN-NEXT: and a0, a2, a0 +; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_4 ; RV32IDZFHMIN-NEXT: # %bb.3: -; RV32IDZFHMIN-NEXT: addi a2, a4, -1 +; RV32IDZFHMIN-NEXT: addi a3, a4, -1 ; RV32IDZFHMIN-NEXT: .LBB10_4: # %start -; RV32IDZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IDZFHMIN-NEXT: neg a4, a1 -; RV32IDZFHMIN-NEXT: and a1, a4, a2 -; RV32IDZFHMIN-NEXT: neg a2, a3 -; RV32IDZFHMIN-NEXT: neg a3, s0 -; RV32IDZFHMIN-NEXT: and a0, a3, a0 -; RV32IDZFHMIN-NEXT: or a0, a2, a0 -; RV32IDZFHMIN-NEXT: and a0, a4, a0 -; RV32IDZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: and a1, a2, a3 +; RV32IDZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IDZFHMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IDZFHMIN-NEXT: addi sp, sp, 16 +; RV32IDZFHMIN-NEXT: addi sp, sp, 32 ; RV32IDZFHMIN-NEXT: ret ; ; CHECK32-IZHINXMIN-LABEL: fcvt_l_h_sat: @@ -2666,8 +2709,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; CHECK32-IZHINXMIN-NEXT: feq.s a2, s0, s0 ; CHECK32-IZHINXMIN-NEXT: neg a2, a2 ; CHECK32-IZHINXMIN-NEXT: lui a4, 524288 +; CHECK32-IZHINXMIN-NEXT: li a5, 1 ; CHECK32-IZHINXMIN-NEXT: lui a3, 524288 -; CHECK32-IZHINXMIN-NEXT: beqz s3, .LBB10_2 +; CHECK32-IZHINXMIN-NEXT: bne s3, a5, .LBB10_2 ; CHECK32-IZHINXMIN-NEXT: # %bb.1: # %start ; CHECK32-IZHINXMIN-NEXT: mv a3, a1 ; CHECK32-IZHINXMIN-NEXT: .LBB10_2: # %start @@ -2720,8 +2764,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: feq.s a2, s0, s0 ; CHECK32-IZDINXZHINXMIN-NEXT: neg a2, a2 ; CHECK32-IZDINXZHINXMIN-NEXT: lui a4, 524288 +; CHECK32-IZDINXZHINXMIN-NEXT: li a5, 1 ; CHECK32-IZDINXZHINXMIN-NEXT: lui a3, 524288 -; CHECK32-IZDINXZHINXMIN-NEXT: beqz s3, .LBB10_2 +; CHECK32-IZDINXZHINXMIN-NEXT: bne s3, a5, .LBB10_2 ; CHECK32-IZDINXZHINXMIN-NEXT: # %bb.1: # %start ; CHECK32-IZDINXZHINXMIN-NEXT: mv a3, a1 ; CHECK32-IZDINXZHINXMIN-NEXT: .LBB10_2: # %start @@ -2939,7 +2984,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.w.x fa5, zero ; RV32IZFH-NEXT: fle.s a0, fa5, fa0 -; RV32IZFH-NEXT: neg s1, a0 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: addi s1, a0, -1 ; RV32IZFH-NEXT: call __fixunssfdi ; RV32IZFH-NEXT: and a0, s1, a0 ; RV32IZFH-NEXT: or a0, s0, a0 @@ -2973,7 +3019,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IDZFH-NEXT: neg s0, a0 ; RV32IDZFH-NEXT: fmv.w.x fa5, zero ; RV32IDZFH-NEXT: fle.s a0, fa5, fa0 -; RV32IDZFH-NEXT: neg s1, a0 +; RV32IDZFH-NEXT: xori a0, a0, 1 +; RV32IDZFH-NEXT: addi s1, a0, -1 ; RV32IDZFH-NEXT: call __fixunssfdi ; RV32IDZFH-NEXT: and a0, s1, a0 ; RV32IDZFH-NEXT: or a0, s0, a0 @@ -3006,7 +3053,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZHINX-NEXT: flt.s a1, a1, a0 ; RV32IZHINX-NEXT: neg s0, a1 ; RV32IZHINX-NEXT: fle.s a1, zero, a0 -; RV32IZHINX-NEXT: neg s1, a1 +; RV32IZHINX-NEXT: xori a1, a1, 1 +; RV32IZHINX-NEXT: addi s1, a1, -1 ; RV32IZHINX-NEXT: call __fixunssfdi ; RV32IZHINX-NEXT: and a0, s1, a0 ; RV32IZHINX-NEXT: or a0, s0, a0 @@ -3039,7 +3087,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZDINXZHINX-NEXT: flt.s a1, a1, a0 ; RV32IZDINXZHINX-NEXT: neg s0, a1 ; RV32IZDINXZHINX-NEXT: fle.s a1, zero, a0 -; RV32IZDINXZHINX-NEXT: neg s1, a1 +; RV32IZDINXZHINX-NEXT: xori a1, a1, 1 +; RV32IZDINXZHINX-NEXT: addi s1, a1, -1 ; RV32IZDINXZHINX-NEXT: call __fixunssfdi ; RV32IZDINXZHINX-NEXT: and a0, s1, a0 ; RV32IZDINXZHINX-NEXT: or a0, s0, a0 @@ -3138,7 +3187,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32ID-ILP32-NEXT: neg s0, a1 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, zero ; RV32ID-ILP32-NEXT: fle.s a1, fa5, fa4 -; RV32ID-ILP32-NEXT: neg s1, a1 +; RV32ID-ILP32-NEXT: xori a1, a1, 1 +; RV32ID-ILP32-NEXT: addi s1, a1, -1 ; RV32ID-ILP32-NEXT: call __fixunssfdi ; RV32ID-ILP32-NEXT: and a0, s1, a0 ; RV32ID-ILP32-NEXT: or a0, s0, a0 @@ -3178,7 +3228,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32ID-NEXT: neg s0, a0 ; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: fle.s a0, fa5, fa0 -; RV32ID-NEXT: neg s1, a0 +; RV32ID-NEXT: xori a0, a0, 1 +; RV32ID-NEXT: addi s1, a0, -1 ; RV32ID-NEXT: call __fixunssfdi ; RV32ID-NEXT: and a0, s1, a0 ; RV32ID-NEXT: or a0, s0, a0 @@ -3217,7 +3268,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZFHMIN-NEXT: neg s0, a0 ; CHECK32-IZFHMIN-NEXT: fmv.w.x fa5, zero ; CHECK32-IZFHMIN-NEXT: fle.s a0, fa5, fa0 -; CHECK32-IZFHMIN-NEXT: neg s1, a0 +; CHECK32-IZFHMIN-NEXT: xori a0, a0, 1 +; CHECK32-IZFHMIN-NEXT: addi s1, a0, -1 ; CHECK32-IZFHMIN-NEXT: call __fixunssfdi ; CHECK32-IZFHMIN-NEXT: and a0, s1, a0 ; CHECK32-IZFHMIN-NEXT: or a0, s0, a0 @@ -3251,7 +3303,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZHINXMIN-NEXT: flt.s a1, a1, a0 ; CHECK32-IZHINXMIN-NEXT: neg s0, a1 ; CHECK32-IZHINXMIN-NEXT: fle.s a1, zero, a0 -; CHECK32-IZHINXMIN-NEXT: neg s1, a1 +; CHECK32-IZHINXMIN-NEXT: xori a1, a1, 1 +; CHECK32-IZHINXMIN-NEXT: addi s1, a1, -1 ; CHECK32-IZHINXMIN-NEXT: call __fixunssfdi ; CHECK32-IZHINXMIN-NEXT: and a0, s1, a0 ; CHECK32-IZHINXMIN-NEXT: or a0, s0, a0 @@ -3285,7 +3338,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: flt.s a1, a1, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: neg s0, a1 ; CHECK32-IZDINXZHINXMIN-NEXT: fle.s a1, zero, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: neg s1, a1 +; CHECK32-IZDINXZHINXMIN-NEXT: xori a1, a1, 1 +; CHECK32-IZDINXZHINXMIN-NEXT: addi s1, a1, -1 ; CHECK32-IZDINXZHINXMIN-NEXT: call __fixunssfdi ; CHECK32-IZDINXZHINXMIN-NEXT: and a0, s1, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: or a0, s0, a0 diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll index 3f385909b0b510..647af5f5b87438 100644 --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -108,38 +108,41 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI1_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB1_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB1_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB1_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB1_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB1_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB1_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB1_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_floor_si64: @@ -177,16 +180,17 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI1_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI1_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB1_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB1_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB1_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -194,11 +198,11 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB1_6 +; RV32IZHINX-NEXT: beqz a3, .LBB1_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB1_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_floor_si64: @@ -236,39 +240,42 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI1_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB1_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB1_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB1_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB1_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB1_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB1_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB1_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_floor_si64: @@ -320,16 +327,17 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI1_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB1_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB1_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB1_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -337,11 +345,11 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB1_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB1_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB1_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_floor_si64: @@ -413,7 +421,7 @@ define signext i32 @test_floor_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -457,7 +465,7 @@ define signext i32 @test_floor_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -499,7 +507,7 @@ define signext i32 @test_floor_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) @@ -522,25 +530,24 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB3_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI3_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI3_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -568,23 +575,22 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI3_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI3_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI3_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI3_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -622,26 +628,25 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB3_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -682,24 +687,23 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI3_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI3_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -820,38 +824,41 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI5_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB5_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB5_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB5_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB5_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB5_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB5_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB5_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_ceil_si64: @@ -889,16 +896,17 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI5_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI5_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB5_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB5_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB5_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -906,11 +914,11 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB5_6 +; RV32IZHINX-NEXT: beqz a3, .LBB5_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB5_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_ceil_si64: @@ -948,39 +956,42 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI5_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB5_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB5_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB5_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI5_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB5_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB5_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB5_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB5_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_ceil_si64: @@ -1032,16 +1043,17 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI5_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB5_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB5_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB5_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1049,11 +1061,11 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB5_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB5_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB5_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_ceil_si64: @@ -1125,7 +1137,7 @@ define signext i32 @test_ceil_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -1169,7 +1181,7 @@ define signext i32 @test_ceil_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -1211,7 +1223,7 @@ define signext i32 @test_ceil_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) @@ -1234,25 +1246,24 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB7_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI7_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI7_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI7_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI7_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -1280,23 +1291,22 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI7_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI7_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI7_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI7_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -1334,26 +1344,25 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB7_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI7_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI7_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -1394,24 +1403,23 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI7_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI7_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -1532,38 +1540,41 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI9_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB9_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB9_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB9_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB9_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB9_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB9_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB9_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_trunc_si64: @@ -1601,16 +1612,17 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI9_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI9_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB9_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB9_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB9_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1618,11 +1630,11 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB9_6 +; RV32IZHINX-NEXT: beqz a3, .LBB9_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB9_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_trunc_si64: @@ -1660,39 +1672,42 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI9_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB9_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB9_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB9_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI9_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB9_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB9_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB9_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB9_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_trunc_si64: @@ -1744,16 +1759,17 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI9_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB9_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB9_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB9_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1761,11 +1777,11 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB9_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB9_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB9_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_trunc_si64: @@ -1837,7 +1853,7 @@ define signext i32 @test_trunc_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -1881,7 +1897,7 @@ define signext i32 @test_trunc_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -1923,7 +1939,7 @@ define signext i32 @test_trunc_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) @@ -1946,25 +1962,24 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB11_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI11_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI11_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI11_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI11_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -1992,23 +2007,22 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI11_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI11_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI11_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI11_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -2046,26 +2060,25 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB11_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI11_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI11_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -2106,24 +2119,23 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI11_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI11_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -2244,38 +2256,41 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI13_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB13_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB13_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB13_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB13_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB13_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB13_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB13_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_round_si64: @@ -2313,16 +2328,17 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI13_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI13_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB13_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB13_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB13_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -2330,11 +2346,11 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB13_6 +; RV32IZHINX-NEXT: beqz a3, .LBB13_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB13_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_round_si64: @@ -2372,39 +2388,42 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI13_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB13_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB13_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB13_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI13_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB13_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB13_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB13_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB13_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_round_si64: @@ -2456,16 +2475,17 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI13_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB13_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB13_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB13_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -2473,11 +2493,11 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB13_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB13_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB13_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_round_si64: @@ -2549,7 +2569,7 @@ define signext i32 @test_round_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -2593,7 +2613,7 @@ define signext i32 @test_round_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -2635,7 +2655,7 @@ define signext i32 @test_round_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) @@ -2658,25 +2678,24 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB15_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI15_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI15_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI15_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI15_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -2704,23 +2723,22 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI15_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI15_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI15_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI15_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -2758,26 +2776,25 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB15_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI15_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI15_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -2818,24 +2835,23 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI15_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI15_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -2956,38 +2972,41 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI17_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB17_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB17_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB17_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB17_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB17_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB17_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB17_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_roundeven_si64: @@ -3025,16 +3044,17 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI17_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI17_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB17_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB17_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB17_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3042,11 +3062,11 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB17_6 +; RV32IZHINX-NEXT: beqz a3, .LBB17_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB17_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_roundeven_si64: @@ -3084,39 +3104,42 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI17_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB17_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB17_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB17_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI17_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB17_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB17_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB17_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB17_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_roundeven_si64: @@ -3168,16 +3191,17 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI17_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB17_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB17_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB17_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3185,11 +3209,11 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB17_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB17_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB17_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_roundeven_si64: @@ -3261,7 +3285,7 @@ define signext i32 @test_roundeven_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -3305,7 +3329,7 @@ define signext i32 @test_roundeven_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -3347,7 +3371,7 @@ define signext i32 @test_roundeven_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) @@ -3370,25 +3394,24 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB19_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI19_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI19_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI19_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI19_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -3416,23 +3439,22 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI19_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI19_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI19_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI19_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -3470,26 +3492,25 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB19_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI19_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI19_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -3530,24 +3551,23 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI19_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI19_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -3668,38 +3688,41 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI21_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB21_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB21_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB21_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB21_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB21_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB21_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB21_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_rint_si64: @@ -3737,16 +3760,17 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI21_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI21_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB21_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB21_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB21_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3754,11 +3778,11 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB21_6 +; RV32IZHINX-NEXT: beqz a3, .LBB21_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB21_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_rint_si64: @@ -3796,39 +3820,42 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI21_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB21_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB21_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB21_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI21_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB21_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB21_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_rint_si64: @@ -3880,16 +3907,17 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI21_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB21_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB21_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB21_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3897,11 +3925,11 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB21_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB21_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB21_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_rint_si64: @@ -3973,7 +4001,7 @@ define signext i32 @test_rint_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -4017,7 +4045,7 @@ define signext i32 @test_rint_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -4059,7 +4087,7 @@ define signext i32 @test_rint_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.rint.f16(half %x) @@ -4082,25 +4110,24 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB23_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI23_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI23_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI23_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI23_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -4128,23 +4155,22 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI23_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI23_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI23_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI23_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -4182,26 +4208,25 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB23_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI23_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI23_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -4242,24 +4267,23 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI23_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI23_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll index cb64e24128b5e3..98c886333d69a0 100644 --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -302,56 +302,56 @@ define i128 @abs128(i128 %x) { ; RV32I-LABEL: abs128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 12(a1) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a1, 8(a1) ; RV32I-NEXT: bgez a2, .LBB8_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: or a6, a4, a3 -; RV32I-NEXT: snez a6, a6 -; RV32I-NEXT: sltu a7, a5, a6 +; RV32I-NEXT: snez a6, a4 +; RV32I-NEXT: snez a7, a3 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: sltu t0, a5, a6 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a2, a1, a7 +; RV32I-NEXT: sub a2, a1, t0 ; RV32I-NEXT: sub a1, a5, a6 -; RV32I-NEXT: snez a5, a4 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: sub a3, a3, a5 ; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a4, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: abs128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a2, 12(a1) -; RV32ZBB-NEXT: lw a3, 4(a1) -; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a1, 8(a1) ; RV32ZBB-NEXT: bgez a2, .LBB8_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: neg a5, a1 -; RV32ZBB-NEXT: or a6, a4, a3 -; RV32ZBB-NEXT: snez a6, a6 -; RV32ZBB-NEXT: sltu a7, a5, a6 +; RV32ZBB-NEXT: snez a6, a4 +; RV32ZBB-NEXT: snez a7, a3 +; RV32ZBB-NEXT: or a6, a7, a6 +; RV32ZBB-NEXT: sltu t0, a5, a6 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a2, a1, a7 +; RV32ZBB-NEXT: sub a2, a1, t0 ; RV32ZBB-NEXT: sub a1, a5, a6 -; RV32ZBB-NEXT: snez a5, a4 -; RV32ZBB-NEXT: neg a3, a3 -; RV32ZBB-NEXT: sub a3, a3, a5 ; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB8_2: -; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a4, 4(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; @@ -384,56 +384,56 @@ define i128 @select_abs128(i128 %x) { ; RV32I-LABEL: select_abs128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 12(a1) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a1, 8(a1) ; RV32I-NEXT: bgez a2, .LBB9_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: or a6, a4, a3 -; RV32I-NEXT: snez a6, a6 -; RV32I-NEXT: sltu a7, a5, a6 +; RV32I-NEXT: snez a6, a4 +; RV32I-NEXT: snez a7, a3 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: sltu t0, a5, a6 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a2, a1, a7 +; RV32I-NEXT: sub a2, a1, t0 ; RV32I-NEXT: sub a1, a5, a6 -; RV32I-NEXT: snez a5, a4 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: sub a3, a3, a5 ; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a4, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: select_abs128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a2, 12(a1) -; RV32ZBB-NEXT: lw a3, 4(a1) -; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a1, 8(a1) ; RV32ZBB-NEXT: bgez a2, .LBB9_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: neg a5, a1 -; RV32ZBB-NEXT: or a6, a4, a3 -; RV32ZBB-NEXT: snez a6, a6 -; RV32ZBB-NEXT: sltu a7, a5, a6 +; RV32ZBB-NEXT: snez a6, a4 +; RV32ZBB-NEXT: snez a7, a3 +; RV32ZBB-NEXT: or a6, a7, a6 +; RV32ZBB-NEXT: sltu t0, a5, a6 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a2, a1, a7 +; RV32ZBB-NEXT: sub a2, a1, t0 ; RV32ZBB-NEXT: sub a1, a5, a6 -; RV32ZBB-NEXT: snez a5, a4 -; RV32ZBB-NEXT: neg a3, a3 -; RV32ZBB-NEXT: sub a3, a3, a5 ; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB9_2: -; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a4, 4(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/pr84200.ll b/llvm/test/CodeGen/RISCV/pr84200.ll new file mode 100644 index 00000000000000..19a102b84ed062 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr84200.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + +; The sub nuw produces poison if the input is not 0 or 1. We must insert a +; freeze before converting the sub to AND so that we don't propagate poison. +define i64 @foo(i64 %1) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: sub a1, a1, a0 +; CHECK-NEXT: sltiu a0, a0, 2 +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: ret +entry: + %.urem.i = sub nuw i64 1, %1 + %.cmp.i = icmp ugt i64 %1, 1 + %2 = xor i64 %.urem.i, 1 + %3 = select i1 %.cmp.i, i64 0, i64 %2 + ret i64 %3 +} diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll index 71040bf2646d2c..4e958f5699adbf 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll @@ -298,14 +298,14 @@ define i32 @not_shl_one_i32(i32 %x) { define i64 @not_shl_one_i64(i64 %x) { ; CHECK-LABEL: not_shl_one_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: sll a1, a1, a0 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: slti a0, a0, 0 -; CHECK-NEXT: neg a2, a0 -; CHECK-NEXT: and a2, a2, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a1, a0, a1 +; CHECK-NEXT: addi a1, a0, -32 +; CHECK-NEXT: slti a1, a1, 0 +; CHECK-NEXT: neg a2, a1 +; CHECK-NEXT: li a3, 1 +; CHECK-NEXT: sll a0, a3, a0 +; CHECK-NEXT: and a2, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a0 ; CHECK-NEXT: not a0, a2 ; CHECK-NEXT: not a1, a1 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32zbs.ll b/llvm/test/CodeGen/RISCV/rv32zbs.ll index ccda8f4e5dd059..30aba61ba47469 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbs.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbs.ll @@ -48,20 +48,20 @@ define i32 @bclr_i32_no_mask(i32 %a, i32 %b) nounwind { define i64 @bclr_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: bclr_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a3, 1 -; RV32I-NEXT: sll a4, a3, a2 -; RV32I-NEXT: andi a2, a2, 63 -; RV32I-NEXT: addi a5, a2, -32 -; RV32I-NEXT: slti a5, a5, 0 -; RV32I-NEXT: neg a6, a5 -; RV32I-NEXT: and a4, a6, a4 -; RV32I-NEXT: sll a2, a3, a2 -; RV32I-NEXT: addi a5, a5, -1 +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi a4, a3, -32 +; RV32I-NEXT: slti a4, a4, 0 +; RV32I-NEXT: neg a5, a4 +; RV32I-NEXT: li a6, 1 +; RV32I-NEXT: sll a2, a6, a2 ; RV32I-NEXT: and a2, a5, a2 -; RV32I-NEXT: not a3, a4 +; RV32I-NEXT: sll a3, a6, a3 +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: and a3, a4, a3 ; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: and a0, a3, a0 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: not a3, a3 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: and a1, a3, a1 ; RV32I-NEXT: ret ; ; RV32ZBSNOZBB-LABEL: bclr_i64: @@ -186,14 +186,14 @@ define i64 @bset_i64(i64 %a, i64 %b) nounwind { define signext i64 @bset_i64_zero(i64 signext %a) nounwind { ; RV32I-LABEL: bset_i64_zero: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: sll a1, a1, a0 -; RV32I-NEXT: addi a0, a0, -32 -; RV32I-NEXT: slti a2, a0, 0 -; RV32I-NEXT: neg a0, a2 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: addi a2, a2, -1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: addi a1, a0, -32 +; RV32I-NEXT: slti a1, a1, 0 +; RV32I-NEXT: neg a2, a1 +; RV32I-NEXT: li a3, 1 +; RV32I-NEXT: sll a3, a3, a0 +; RV32I-NEXT: and a0, a2, a3 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBS-LABEL: bset_i64_zero: diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll index 4ec7f2660b2a35..73bfc6480b4d75 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll @@ -489,7 +489,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -513,7 +513,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64XTHEADBB-NEXT: add a0, a1, a0 ; RV64XTHEADBB-NEXT: lbu a0, 0(a0) ; RV64XTHEADBB-NEXT: snez a1, s0 -; RV64XTHEADBB-NEXT: addi a1, a1, -1 +; RV64XTHEADBB-NEXT: addiw a1, a1, -1 ; RV64XTHEADBB-NEXT: or a0, a1, a0 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64XTHEADBB-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -542,12 +542,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: addiw a0, a0, 1 ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 @@ -569,12 +567,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64XTHEADBB-NEXT: addi a1, a1, %lo(.LCPI9_0) ; RV64XTHEADBB-NEXT: add a0, a1, a0 ; RV64XTHEADBB-NEXT: lbu a0, 0(a0) -; RV64XTHEADBB-NEXT: addi a0, a0, 1 +; RV64XTHEADBB-NEXT: addiw a0, a0, 1 ; RV64XTHEADBB-NEXT: seqz a1, s0 -; RV64XTHEADBB-NEXT: addi a1, a1, -1 +; RV64XTHEADBB-NEXT: addiw a1, a1, -1 ; RV64XTHEADBB-NEXT: and a0, a1, a0 -; RV64XTHEADBB-NEXT: slli a0, a0, 32 -; RV64XTHEADBB-NEXT: srli a0, a0, 32 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64XTHEADBB-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64XTHEADBB-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll index 68ce66cbe8537d..7feef4dad4116a 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll @@ -444,7 +444,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -481,12 +481,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: addiw a0, a0, 1 ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 @@ -495,11 +493,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64ZBB-LABEL: ffs_i32: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ctzw a1, a0 -; RV64ZBB-NEXT: addi a1, a1, 1 +; RV64ZBB-NEXT: addiw a1, a1, 1 ; RV64ZBB-NEXT: seqz a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: addiw a0, a0, -1 ; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) %2 = add i32 %1, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index 87d95d7596d4fa..139579b3d2a361 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -161,72 +161,71 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv128: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_1) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 6, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 4 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v10, v16, a2 +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v8, v16, a2 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 6 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_3) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 10, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 8 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 6 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI10_4) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 12, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 10 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v9, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_5) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 14, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 12 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v9, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_6) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v0, v16, 14 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v0, v9, 6 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vi v0, v8, 8 ; CHECK-NEXT: ret %mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc) ret <128 x i1> %mask diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll index c64216180c2af7..ed434deea1a837 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll @@ -19,7 +19,7 @@ define void @test(ptr %ref_array, ptr %sad_array) { ; RV32-NEXT: th.swia a0, (a1), 4, 0 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV32-NEXT: vle8.v v10, (a3) -; RV32-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vi v10, v9, 4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vzext.vf4 v12, v10 @@ -42,7 +42,7 @@ define void @test(ptr %ref_array, ptr %sad_array) { ; RV64-NEXT: th.swia a0, (a1), 4, 0 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64-NEXT: vle8.v v10, (a3) -; RV64-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vslideup.vi v10, v9, 4 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vzext.vf4 v12, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll index 76aa2b913c6525..e15e6452163b1c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -469,9 +469,8 @@ define @extract_nxv6f16_nxv12f16_6( %in) ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v13, v10, a0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v13, v10, a0 ; CHECK-NEXT: vslidedown.vx v12, v9, a0 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll index e5bef20fd9e24d..8474f95edd813f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll @@ -5,6 +5,59 @@ ; RUN: llc < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS %s ; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS %s +define <8 x i16> @concat_2xv4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: concat_2xv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: ret + %ab = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> + ret <8 x i16> %ab +} + +define <8 x i16> @concat_4xv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) { +; CHECK-LABEL: concat_4xv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v10, v11, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: ret + %ab = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> + %cd = shufflevector <2 x i16> %c, <2 x i16> %d, <4 x i32> + %abcd = shufflevector <4 x i16> %ab, <4 x i16> %cd, <8 x i32> + ret <8 x i16> %abcd +} + +define <8 x i16> @concat_8xv1i16(<1 x i16> %a, <1 x i16> %b, <1 x i16> %c, <1 x i16> %d, <1 x i16> %e, <1 x i16> %f, <1 x i16> %g, <1 x i16> %h) { +; CHECK-LABEL: concat_8xv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v12, v13, 1 +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v12, v14, 2 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v12, v15, 3 +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 3 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 4 +; CHECK-NEXT: ret + %ab = shufflevector <1 x i16> %a, <1 x i16> %b, <2 x i32> + %cd = shufflevector <1 x i16> %c, <1 x i16> %d, <2 x i32> + %abcd = shufflevector <2 x i16> %ab, <2 x i16> %cd, <4 x i32> + %ef = shufflevector <1 x i16> %e, <1 x i16> %f, <2 x i32> + %gh = shufflevector <1 x i16> %g, <1 x i16> %h, <2 x i32> + %efgh = shufflevector <2 x i16> %ef, <2 x i16> %gh, <4 x i32> + %abcdefgh = shufflevector <4 x i16> %abcd, <4 x i16> %efgh, <8 x i32> + ret <8 x i16> %abcdefgh +} + define <8 x i32> @concat_2xv4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: concat_2xv4i32: ; CHECK: # %bb.0: @@ -19,14 +72,11 @@ define <8 x i32> @concat_2xv4i32(<4 x i32> %a, <4 x i32> %b) { define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) { ; CHECK-LABEL: concat_4xv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v11 -; CHECK-NEXT: vmv1r.v v14, v9 -; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v14, 2 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v10, v11, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: ret %ab = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> %cd = shufflevector <2 x i32> %c, <2 x i32> %d, <4 x i32> @@ -37,24 +87,18 @@ define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x define <8 x i32> @concat_8xv1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %c, <1 x i32> %d, <1 x i32> %e, <1 x i32> %f, <1 x i32> %g, <1 x i32> %h) { ; CHECK-LABEL: concat_8xv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v15 -; CHECK-NEXT: vmv1r.v v18, v13 -; CHECK-NEXT: vmv1r.v v20, v11 -; CHECK-NEXT: vmv1r.v v22, v9 -; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v22, 1 -; CHECK-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v14, v15, 1 +; CHECK-NEXT: vslideup.vi v12, v13, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v12, v14, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v10, v11, 1 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 -; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v20, 3 -; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 4 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v18, 5 -; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v14, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 7 +; CHECK-NEXT: vslideup.vi v8, v12, 4 ; CHECK-NEXT: ret %ab = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> %cd = shufflevector <1 x i32> %c, <1 x i32> %d, <2 x i32> @@ -80,15 +124,14 @@ define <16 x i32> @concat_2xv8i32(<8 x i32> %a, <8 x i32> %b) { define <16 x i32> @concat_4xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: concat_4xv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v11 -; CHECK-NEXT: vmv1r.v v16, v10 -; CHECK-NEXT: vmv1r.v v20, v9 -; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v20, 4 -; CHECK-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v16, 8 +; CHECK-NEXT: vmv1r.v v14, v11 +; CHECK-NEXT: vmv1r.v v12, v10 +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v12, v14, 4 +; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 12 +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> %cd = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> @@ -99,26 +142,18 @@ define <16 x i32> @concat_4xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x define <16 x i32> @concat_8xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d, <2 x i32> %e, <2 x i32> %f, <2 x i32> %g, <2 x i32> %h) { ; CHECK-LABEL: concat_8xv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v15 -; CHECK-NEXT: vmv1r.v v20, v14 -; CHECK-NEXT: vmv1r.v v24, v13 -; CHECK-NEXT: vmv1r.v v28, v11 -; CHECK-NEXT: vmv1r.v v4, v10 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v0, 2 -; CHECK-NEXT: vsetivli zero, 6, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v4, 4 -; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v28, 6 -; CHECK-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 8 -; CHECK-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v24, 10 -; CHECK-NEXT: vsetivli zero, 14, e32, m4, tu, ma -; CHECK-NEXT: vslideup.vi v8, v20, 12 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v14, v15, 2 +; CHECK-NEXT: vslideup.vi v12, v13, 2 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v12, v14, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v10, v11, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 14 +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %ab = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> %cd = shufflevector <2 x i32> %c, <2 x i32> %d, <4 x i32> @@ -152,29 +187,27 @@ define <32 x i32> @concat_2xv16i32(<16 x i32> %a, <16 x i32> %b) { define <32 x i32> @concat_4xv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { ; VLA-LABEL: concat_4xv8i32: ; VLA: # %bb.0: -; VLA-NEXT: vmv2r.v v16, v14 -; VLA-NEXT: vmv2r.v v24, v12 -; VLA-NEXT: vmv2r.v v0, v10 -; VLA-NEXT: vsetivli zero, 16, e32, m8, tu, ma -; VLA-NEXT: vslideup.vi v8, v0, 8 -; VLA-NEXT: vsetivli zero, 24, e32, m8, tu, ma -; VLA-NEXT: vslideup.vi v8, v24, 16 +; VLA-NEXT: vmv2r.v v20, v14 +; VLA-NEXT: vmv2r.v v16, v12 +; VLA-NEXT: vmv2r.v v12, v10 +; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLA-NEXT: vslideup.vi v16, v20, 8 +; VLA-NEXT: vslideup.vi v8, v12, 8 ; VLA-NEXT: li a0, 32 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; VLA-NEXT: vslideup.vi v8, v16, 24 +; VLA-NEXT: vslideup.vi v8, v16, 16 ; VLA-NEXT: ret ; ; VLS-LABEL: concat_4xv8i32: ; VLS: # %bb.0: -; VLS-NEXT: vmv2r.v v16, v14 -; VLS-NEXT: vmv2r.v v24, v12 -; VLS-NEXT: vmv2r.v v0, v10 -; VLS-NEXT: vsetivli zero, 16, e32, m8, tu, ma -; VLS-NEXT: vslideup.vi v8, v0, 8 -; VLS-NEXT: vsetivli zero, 24, e32, m8, tu, ma -; VLS-NEXT: vslideup.vi v8, v24, 16 +; VLS-NEXT: vmv2r.v v20, v14 +; VLS-NEXT: vmv2r.v v16, v12 +; VLS-NEXT: vmv2r.v v12, v10 +; VLS-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLS-NEXT: vslideup.vi v16, v20, 8 +; VLS-NEXT: vslideup.vi v8, v12, 8 ; VLS-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; VLS-NEXT: vslideup.vi v8, v16, 24 +; VLS-NEXT: vslideup.vi v8, v16, 16 ; VLS-NEXT: ret %ab = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> %cd = shufflevector <8 x i32> %c, <8 x i32> %d, <16 x i32> @@ -185,123 +218,49 @@ define <32 x i32> @concat_4xv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h) { ; VLA-LABEL: concat_8xv4i32: ; VLA: # %bb.0: -; VLA-NEXT: addi sp, sp, -16 -; VLA-NEXT: .cfi_def_cfa_offset 16 -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 5 -; VLA-NEXT: sub sp, sp, a0 -; VLA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; VLA-NEXT: vmv1r.v v16, v15 -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 3 -; VLA-NEXT: mv a1, a0 -; VLA-NEXT: slli a0, a0, 1 -; VLA-NEXT: add a0, a0, a1 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLA-NEXT: vmv1r.v v16, v14 -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 4 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLA-NEXT: vmv1r.v v16, v13 -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 3 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; VLA-NEXT: vmv1r.v v18, v15 +; VLA-NEXT: vmv1r.v v20, v14 +; VLA-NEXT: vmv1r.v v22, v13 ; VLA-NEXT: vmv1r.v v16, v12 -; VLA-NEXT: addi a0, sp, 16 -; VLA-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLA-NEXT: vmv1r.v v0, v11 -; VLA-NEXT: vmv1r.v v24, v10 -; VLA-NEXT: vmv1r.v v16, v9 -; VLA-NEXT: vsetivli zero, 8, e32, m8, tu, ma -; VLA-NEXT: vslideup.vi v8, v16, 4 -; VLA-NEXT: vsetivli zero, 12, e32, m8, tu, ma -; VLA-NEXT: vslideup.vi v8, v24, 8 -; VLA-NEXT: vsetivli zero, 16, e32, m8, tu, ma -; VLA-NEXT: vslideup.vi v8, v0, 12 -; VLA-NEXT: vsetivli zero, 20, e32, m8, tu, ma -; VLA-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLA-NEXT: vslideup.vi v8, v16, 16 -; VLA-NEXT: vsetivli zero, 24, e32, m8, tu, ma -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 3 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLA-NEXT: vslideup.vi v8, v16, 20 -; VLA-NEXT: vsetivli zero, 28, e32, m8, tu, ma -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 4 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLA-NEXT: vslideup.vi v8, v16, 24 +; VLA-NEXT: vmv1r.v v14, v11 +; VLA-NEXT: vmv1r.v v12, v10 +; VLA-NEXT: vmv1r.v v10, v9 +; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLA-NEXT: vslideup.vi v20, v18, 4 +; VLA-NEXT: vslideup.vi v16, v22, 4 +; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLA-NEXT: vslideup.vi v16, v20, 8 +; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLA-NEXT: vslideup.vi v12, v14, 4 +; VLA-NEXT: vslideup.vi v8, v10, 4 +; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLA-NEXT: vslideup.vi v8, v12, 8 ; VLA-NEXT: li a0, 32 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 3 -; VLA-NEXT: mv a1, a0 -; VLA-NEXT: slli a0, a0, 1 -; VLA-NEXT: add a0, a0, a1 -; VLA-NEXT: add a0, sp, a0 -; VLA-NEXT: addi a0, a0, 16 -; VLA-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLA-NEXT: vslideup.vi v8, v16, 28 -; VLA-NEXT: csrr a0, vlenb -; VLA-NEXT: slli a0, a0, 5 -; VLA-NEXT: add sp, sp, a0 -; VLA-NEXT: addi sp, sp, 16 +; VLA-NEXT: vslideup.vi v8, v16, 16 ; VLA-NEXT: ret ; ; VLS-LABEL: concat_8xv4i32: ; VLS: # %bb.0: -; VLS-NEXT: addi sp, sp, -16 -; VLS-NEXT: .cfi_def_cfa_offset 16 -; VLS-NEXT: addi sp, sp, -512 -; VLS-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; VLS-NEXT: vmv1r.v v16, v15 -; VLS-NEXT: addi a0, sp, 400 -; VLS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLS-NEXT: vmv1r.v v16, v14 -; VLS-NEXT: addi a0, sp, 272 -; VLS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLS-NEXT: vmv1r.v v16, v13 -; VLS-NEXT: addi a0, sp, 144 -; VLS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; VLS-NEXT: vmv1r.v v18, v15 +; VLS-NEXT: vmv1r.v v20, v14 +; VLS-NEXT: vmv1r.v v22, v13 ; VLS-NEXT: vmv1r.v v16, v12 -; VLS-NEXT: addi a0, sp, 16 -; VLS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; VLS-NEXT: vmv1r.v v0, v11 -; VLS-NEXT: vmv1r.v v24, v10 -; VLS-NEXT: vmv1r.v v16, v9 -; VLS-NEXT: vsetivli zero, 8, e32, m8, tu, ma -; VLS-NEXT: vslideup.vi v8, v16, 4 -; VLS-NEXT: vsetivli zero, 12, e32, m8, tu, ma -; VLS-NEXT: vslideup.vi v8, v24, 8 -; VLS-NEXT: vsetivli zero, 16, e32, m8, tu, ma -; VLS-NEXT: vslideup.vi v8, v0, 12 -; VLS-NEXT: vsetivli zero, 20, e32, m8, tu, ma -; VLS-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLS-NEXT: vslideup.vi v8, v16, 16 -; VLS-NEXT: vsetivli zero, 24, e32, m8, tu, ma -; VLS-NEXT: addi a0, sp, 144 -; VLS-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLS-NEXT: vslideup.vi v8, v16, 20 -; VLS-NEXT: vsetivli zero, 28, e32, m8, tu, ma -; VLS-NEXT: addi a0, sp, 272 -; VLS-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLS-NEXT: vslideup.vi v8, v16, 24 +; VLS-NEXT: vmv1r.v v14, v11 +; VLS-NEXT: vmv1r.v v12, v10 +; VLS-NEXT: vmv1r.v v10, v9 +; VLS-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLS-NEXT: vslideup.vi v20, v18, 4 +; VLS-NEXT: vslideup.vi v16, v22, 4 +; VLS-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLS-NEXT: vslideup.vi v16, v20, 8 +; VLS-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLS-NEXT: vslideup.vi v12, v14, 4 +; VLS-NEXT: vslideup.vi v8, v10, 4 +; VLS-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLS-NEXT: vslideup.vi v8, v12, 8 ; VLS-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; VLS-NEXT: addi a0, sp, 400 -; VLS-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; VLS-NEXT: vslideup.vi v8, v16, 28 -; VLS-NEXT: addi sp, sp, 512 -; VLS-NEXT: addi sp, sp, 16 +; VLS-NEXT: vslideup.vi v8, v16, 16 ; VLS-NEXT: ret %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> %cd = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index ba5db552b8544f..37902aa1873215 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -24,17 +24,15 @@ define void @widen_2xv4i16(ptr %x, ptr %z) { define void @widen_3xv4i16(ptr %x, ptr %z) { ; CHECK-LABEL: widen_3xv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, 16 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 8 -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vle16.v v12, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 8 -; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: vle16.v v8, (a2) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vslideup.vi v10, v8, 8 +; CHECK-NEXT: vsetivli zero, 12, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v10, (a1) ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x %b.gep = getelementptr i8, ptr %x, i64 8 @@ -72,20 +70,18 @@ define void @widen_4xv4i16(ptr %x, ptr %z) { define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) { ; CHECK-NO-MISALIGN-LABEL: widen_4xv4i16_unaligned: ; CHECK-NO-MISALIGN: # %bb.0: -; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0) -; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8 -; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2) ; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16 -; CHECK-NO-MISALIGN-NEXT: vle8.v v12, (a2) +; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2) +; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8 ; CHECK-NO-MISALIGN-NEXT: addi a0, a0, 24 -; CHECK-NO-MISALIGN-NEXT: vle8.v v14, (a0) -; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v12, 8 +; CHECK-NO-MISALIGN-NEXT: vle8.v v9, (a0) +; CHECK-NO-MISALIGN-NEXT: vle8.v v11, (a2) +; CHECK-NO-MISALIGN-NEXT: vslideup.vi v10, v9, 4 +; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v11, 4 ; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v14, 12 +; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 8 ; CHECK-NO-MISALIGN-NEXT: vse16.v v8, (a1) ; CHECK-NO-MISALIGN-NEXT: ret ; @@ -185,21 +181,14 @@ define void @strided_constant_0(ptr %x, ptr %z) { define void @strided_constant_mismatch_4xv4i16(ptr %x, ptr %z) { ; CHECK-LABEL: strided_constant_mismatch_4xv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 2 -; CHECK-NEXT: vle16.v v10, (a2) ; CHECK-NEXT: addi a2, a0, 6 -; CHECK-NEXT: vle16.v v12, (a2) -; CHECK-NEXT: addi a0, a0, 8 -; CHECK-NEXT: vle16.v v14, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 8 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v14, 12 -; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: li a3, 2 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vlse64.v v8, (a0), a3 +; CHECK-NEXT: vlse64.v v10, (a2), a3 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x %b.gep = getelementptr i8, ptr %x, i64 2 @@ -255,59 +244,38 @@ define void @strided_runtime_4xv4i16(ptr %x, ptr %z, i64 %s) { define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) { ; RV32-LABEL: strided_runtime_mismatch_4xv4i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: vle16.v v10, (a0) -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: vle16.v v12, (a0) -; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: vle16.v v14, (a0) -; RV32-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; RV32-NEXT: vslideup.vi v8, v10, 4 -; RV32-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; RV32-NEXT: vslideup.vi v8, v12, 8 -; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vslideup.vi v8, v14, 12 -; RV32-NEXT: vse16.v v8, (a1) +; RV32-NEXT: add a3, a0, a2 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), a2 +; RV32-NEXT: vlse64.v v10, (a3), a2 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vslideup.vi v8, v10, 2 +; RV32-NEXT: vse64.v v8, (a1) ; RV32-NEXT: ret ; ; RV64-LABEL: strided_runtime_mismatch_4xv4i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: vle16.v v10, (a0) -; RV64-NEXT: add a0, a0, a3 -; RV64-NEXT: vle16.v v12, (a0) -; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: vle16.v v14, (a0) -; RV64-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; RV64-NEXT: vslideup.vi v8, v12, 8 -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vslideup.vi v8, v14, 12 -; RV64-NEXT: vse16.v v8, (a1) +; RV64-NEXT: add a4, a0, a2 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vlse64.v v8, (a0), a2 +; RV64-NEXT: vlse64.v v10, (a3), a2 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vslideup.vi v8, v10, 2 +; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: ret ; ; ZVE64F-LABEL: strided_runtime_mismatch_4xv4i16: ; ZVE64F: # %bb.0: -; ZVE64F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVE64F-NEXT: vle16.v v8, (a0) -; ZVE64F-NEXT: add a0, a0, a2 -; ZVE64F-NEXT: vle16.v v10, (a0) -; ZVE64F-NEXT: add a0, a0, a3 -; ZVE64F-NEXT: vle16.v v12, (a0) -; ZVE64F-NEXT: add a0, a0, a2 -; ZVE64F-NEXT: vle16.v v14, (a0) -; ZVE64F-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; ZVE64F-NEXT: vslideup.vi v8, v10, 4 -; ZVE64F-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; ZVE64F-NEXT: vslideup.vi v8, v12, 8 -; ZVE64F-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVE64F-NEXT: vslideup.vi v8, v14, 12 -; ZVE64F-NEXT: vse16.v v8, (a1) +; ZVE64F-NEXT: add a4, a0, a2 +; ZVE64F-NEXT: add a3, a4, a3 +; ZVE64F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVE64F-NEXT: vlse64.v v8, (a0), a2 +; ZVE64F-NEXT: vlse64.v v10, (a3), a2 +; ZVE64F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVE64F-NEXT: vslideup.vi v8, v10, 2 +; ZVE64F-NEXT: vse64.v v8, (a1) ; ZVE64F-NEXT: ret %a = load <4 x i16>, ptr %x %b.gep = getelementptr i8, ptr %x, i64 %s diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 2ae031798f5bd6..2ae058128eaa00 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -549,36 +549,36 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: li a3, 32 ; CHECK-RV32-NEXT: .LBB42_2: -; CHECK-RV32-NEXT: mul a5, a3, a2 -; CHECK-RV32-NEXT: addi a6, a4, -32 -; CHECK-RV32-NEXT: sltu a4, a4, a6 -; CHECK-RV32-NEXT: addi a4, a4, -1 -; CHECK-RV32-NEXT: and a6, a4, a6 -; CHECK-RV32-NEXT: li a4, 16 -; CHECK-RV32-NEXT: add a5, a1, a5 -; CHECK-RV32-NEXT: bltu a6, a4, .LBB42_4 +; CHECK-RV32-NEXT: mul a6, a3, a2 +; CHECK-RV32-NEXT: addi a5, a4, -32 +; CHECK-RV32-NEXT: sltu a7, a4, a5 +; CHECK-RV32-NEXT: addi a7, a7, -1 +; CHECK-RV32-NEXT: and a7, a7, a5 +; CHECK-RV32-NEXT: li a5, 16 +; CHECK-RV32-NEXT: add a6, a1, a6 +; CHECK-RV32-NEXT: bltu a7, a5, .LBB42_4 ; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: li a6, 16 +; CHECK-RV32-NEXT: li a7, 16 ; CHECK-RV32-NEXT: .LBB42_4: ; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v16, (a5), a2, v0.t -; CHECK-RV32-NEXT: addi a5, a3, -16 -; CHECK-RV32-NEXT: sltu a6, a3, a5 -; CHECK-RV32-NEXT: addi a6, a6, -1 -; CHECK-RV32-NEXT: and a5, a6, a5 -; CHECK-RV32-NEXT: bltu a3, a4, .LBB42_6 +; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v16, (a6), a2, v0.t +; CHECK-RV32-NEXT: addi a6, a3, -16 +; CHECK-RV32-NEXT: sltu a3, a3, a6 +; CHECK-RV32-NEXT: addi a3, a3, -1 +; CHECK-RV32-NEXT: and a3, a3, a6 +; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_6 ; CHECK-RV32-NEXT: # %bb.5: -; CHECK-RV32-NEXT: li a3, 16 +; CHECK-RV32-NEXT: li a4, 16 ; CHECK-RV32-NEXT: .LBB42_6: -; CHECK-RV32-NEXT: mul a4, a3, a2 -; CHECK-RV32-NEXT: add a4, a1, a4 +; CHECK-RV32-NEXT: mul a5, a4, a2 +; CHECK-RV32-NEXT: add a5, a1, a5 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v24, (a4), a2, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma @@ -599,36 +599,36 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: li a4, 32 ; CHECK-RV64-NEXT: .LBB42_2: -; CHECK-RV64-NEXT: mul a5, a4, a2 -; CHECK-RV64-NEXT: addi a6, a3, -32 -; CHECK-RV64-NEXT: sltu a3, a3, a6 -; CHECK-RV64-NEXT: addi a3, a3, -1 -; CHECK-RV64-NEXT: and a6, a3, a6 -; CHECK-RV64-NEXT: li a3, 16 -; CHECK-RV64-NEXT: add a5, a1, a5 -; CHECK-RV64-NEXT: bltu a6, a3, .LBB42_4 +; CHECK-RV64-NEXT: mul a6, a4, a2 +; CHECK-RV64-NEXT: addi a5, a3, -32 +; CHECK-RV64-NEXT: sltu a7, a3, a5 +; CHECK-RV64-NEXT: addi a7, a7, -1 +; CHECK-RV64-NEXT: and a7, a7, a5 +; CHECK-RV64-NEXT: li a5, 16 +; CHECK-RV64-NEXT: add a6, a1, a6 +; CHECK-RV64-NEXT: bltu a7, a5, .LBB42_4 ; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: li a6, 16 +; CHECK-RV64-NEXT: li a7, 16 ; CHECK-RV64-NEXT: .LBB42_4: ; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v16, (a5), a2, v0.t -; CHECK-RV64-NEXT: addi a5, a4, -16 -; CHECK-RV64-NEXT: sltu a6, a4, a5 -; CHECK-RV64-NEXT: addi a6, a6, -1 -; CHECK-RV64-NEXT: and a5, a6, a5 -; CHECK-RV64-NEXT: bltu a4, a3, .LBB42_6 +; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v16, (a6), a2, v0.t +; CHECK-RV64-NEXT: addi a6, a4, -16 +; CHECK-RV64-NEXT: sltu a4, a4, a6 +; CHECK-RV64-NEXT: addi a4, a4, -1 +; CHECK-RV64-NEXT: and a4, a4, a6 +; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_6 ; CHECK-RV64-NEXT: # %bb.5: -; CHECK-RV64-NEXT: li a4, 16 +; CHECK-RV64-NEXT: li a3, 16 ; CHECK-RV64-NEXT: .LBB42_6: -; CHECK-RV64-NEXT: mul a3, a4, a2 -; CHECK-RV64-NEXT: add a3, a1, a3 +; CHECK-RV64-NEXT: mul a5, a3, a2 +; CHECK-RV64-NEXT: add a5, a1, a5 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t ; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index e7b74737239154..4f16ce28bbb7e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -310,23 +310,24 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: mv a6, a7 ; CHECK-NEXT: bltu a7, a3, .LBB16_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a7, 64 +; CHECK-NEXT: li a6, 64 ; CHECK-NEXT: .LBB16_4: ; CHECK-NEXT: addi a5, a1, 384 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: slli a6, a6, 3 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: addi a6, a7, -32 -; CHECK-NEXT: sltu t0, a7, a6 -; CHECK-NEXT: addi t0, t0, -1 -; CHECK-NEXT: and a6, t0, a6 +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 16 +; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill +; CHECK-NEXT: addi t0, a6, -32 +; CHECK-NEXT: sltu a6, a6, t0 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a6, a6, t0 ; CHECK-NEXT: addi t0, a6, -16 ; CHECK-NEXT: sltu t1, a6, t0 ; CHECK-NEXT: addi t1, t1, -1 @@ -364,14 +365,15 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: mv a5, a4 ; CHECK-NEXT: bltu a4, a3, .LBB16_8 ; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: li a4, 32 +; CHECK-NEXT: li a5, 32 ; CHECK-NEXT: .LBB16_8: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, a4, -16 -; CHECK-NEXT: sltu a5, a4, a1 +; CHECK-NEXT: addi a1, a5, -16 +; CHECK-NEXT: sltu a5, a5, a1 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a1, a5, a1 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma @@ -387,62 +389,63 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: mv a1, a7 ; CHECK-NEXT: bltu a7, a3, .LBB16_12 ; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: li a7, 32 +; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB16_12: ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 24 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 56 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 56 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vslideup.vi v8, v24, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 56 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 56 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 4 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 48 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vslideup.vi v8, v24, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 48 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v8, v0 ; CHECK-NEXT: vslideup.vi v8, v16, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a7, -16 -; CHECK-NEXT: sltu a4, a7, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 24 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a1, -16 +; CHECK-NEXT: sltu a1, a1, a4 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a4 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: csrr a1, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index bb213c9276a3a9..618b875be56651 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -418,20 +418,20 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: addi a4, a3, -16 -; CHECK-NEXT: sltu a5, a3, a4 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a4, a5, a4 -; CHECK-NEXT: addi a5, a1, 128 +; CHECK-NEXT: sltu a3, a3, a4 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: addi a4, a1, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a5), v0.t -; CHECK-NEXT: addi a4, a2, -32 -; CHECK-NEXT: sltu a2, a2, a4 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a4, a2, a4 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: bltu a4, a2, .LBB32_4 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a4), v0.t +; CHECK-NEXT: addi a3, a2, -32 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: bltu a4, a3, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB32_4: @@ -440,11 +440,11 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a5), v0.t -; CHECK-NEXT: bltu a3, a2, .LBB32_6 +; CHECK-NEXT: bltu a2, a3, .LBB32_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB32_6: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vle64.v v8, (a1), v0.t ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 48ce7d623475cb..3ada24bd9846a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -89,17 +89,17 @@ entry: define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NOV-NEXT: li a2, -1 ; CHECK-NOV-NEXT: srli a2, a2, 32 -; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz -; CHECK-NOV-NEXT: blt a0, a2, .LBB2_2 +; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz +; CHECK-NOV-NEXT: blt a1, a2, .LBB2_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: .LBB2_2: # %entry -; CHECK-NOV-NEXT: blt a1, a2, .LBB2_4 +; CHECK-NOV-NEXT: blt a0, a2, .LBB2_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: .LBB2_4: # %entry ; CHECK-NOV-NEXT: sgtz a2, a1 ; CHECK-NOV-NEXT: sgtz a3, a0 @@ -257,46 +257,46 @@ entry: define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz ; CHECK-NOV-NEXT: li a4, -1 ; CHECK-NOV-NEXT: srli a4, a4, 32 -; CHECK-NOV-NEXT: fcvt.l.s a2, fa1, rtz +; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz ; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz ; CHECK-NOV-NEXT: bge a2, a4, .LBB5_7 ; CHECK-NOV-NEXT: .LBB5_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz ; CHECK-NOV-NEXT: bge a3, a4, .LBB5_8 ; CHECK-NOV-NEXT: .LBB5_3: # %entry ; CHECK-NOV-NEXT: blt a5, a4, .LBB5_5 ; CHECK-NOV-NEXT: .LBB5_4: # %entry ; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB5_5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a5 -; CHECK-NOV-NEXT: sgtz a6, a3 -; CHECK-NOV-NEXT: sgtz a7, a2 -; CHECK-NOV-NEXT: sgtz t0, a1 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: sgtz a6, a2 +; CHECK-NOV-NEXT: sgtz a7, a3 +; CHECK-NOV-NEXT: sgtz t0, a5 ; CHECK-NOV-NEXT: negw t0, t0 -; CHECK-NOV-NEXT: and a1, t0, a1 +; CHECK-NOV-NEXT: and a5, t0, a5 ; CHECK-NOV-NEXT: negw a7, a7 -; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: and a3, a7, a3 ; CHECK-NOV-NEXT: negw a6, a6 -; CHECK-NOV-NEXT: and a3, a6, a3 +; CHECK-NOV-NEXT: and a2, a6, a2 ; CHECK-NOV-NEXT: negw a4, a4 -; CHECK-NOV-NEXT: and a4, a4, a5 -; CHECK-NOV-NEXT: sw a4, 12(a0) -; CHECK-NOV-NEXT: sw a3, 8(a0) -; CHECK-NOV-NEXT: sw a2, 4(a0) -; CHECK-NOV-NEXT: sw a1, 0(a0) +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: sw a1, 12(a0) +; CHECK-NOV-NEXT: sw a2, 8(a0) +; CHECK-NOV-NEXT: sw a3, 4(a0) +; CHECK-NOV-NEXT: sw a5, 0(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB5_6: # %entry ; CHECK-NOV-NEXT: mv a1, a4 -; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz ; CHECK-NOV-NEXT: blt a2, a4, .LBB5_2 ; CHECK-NOV-NEXT: .LBB5_7: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.l.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz ; CHECK-NOV-NEXT: blt a3, a4, .LBB5_3 ; CHECK-NOV-NEXT: .LBB5_8: # %entry ; CHECK-NOV-NEXT: mv a3, a4 @@ -441,57 +441,50 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -609,57 +602,50 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -700,10 +686,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs0, -48 ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 -; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 24(a1) -; CHECK-NOV-NEXT: lhu s3, 16(a1) -; CHECK-NOV-NEXT: lhu a1, 8(a1) +; CHECK-NOV-NEXT: lhu s1, 24(a1) +; CHECK-NOV-NEXT: lhu s2, 0(a1) +; CHECK-NOV-NEXT: lhu s3, 8(a1) +; CHECK-NOV-NEXT: lhu a1, 16(a1) ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 @@ -732,22 +718,22 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .LBB8_4: # %entry ; CHECK-NOV-NEXT: mv a3, a2 ; CHECK-NOV-NEXT: .LBB8_5: # %entry -; CHECK-NOV-NEXT: sgtz a2, a3 -; CHECK-NOV-NEXT: sgtz a4, a1 -; CHECK-NOV-NEXT: sgtz a5, s1 -; CHECK-NOV-NEXT: sgtz a6, a0 +; CHECK-NOV-NEXT: sgtz a2, a0 +; CHECK-NOV-NEXT: sgtz a4, s1 +; CHECK-NOV-NEXT: sgtz a5, a1 +; CHECK-NOV-NEXT: sgtz a6, a3 ; CHECK-NOV-NEXT: negw a6, a6 -; CHECK-NOV-NEXT: and a0, a6, a0 +; CHECK-NOV-NEXT: and a3, a6, a3 ; CHECK-NOV-NEXT: negw a5, a5 -; CHECK-NOV-NEXT: and a5, a5, s1 +; CHECK-NOV-NEXT: and a1, a5, a1 ; CHECK-NOV-NEXT: negw a4, a4 -; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: and a4, a4, s1 ; CHECK-NOV-NEXT: negw a2, a2 -; CHECK-NOV-NEXT: and a2, a2, a3 -; CHECK-NOV-NEXT: sw a2, 12(s0) -; CHECK-NOV-NEXT: sw a1, 8(s0) -; CHECK-NOV-NEXT: sw a5, 4(s0) -; CHECK-NOV-NEXT: sw a0, 0(s0) +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: sw a0, 12(s0) +; CHECK-NOV-NEXT: sw a4, 8(s0) +; CHECK-NOV-NEXT: sw a1, 4(s0) +; CHECK-NOV-NEXT: sw a3, 0(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -787,60 +773,53 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vslideup.vi v8, v10, 2 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vmin.vx v8, v10, a0 +; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 @@ -950,17 +929,17 @@ entry: define <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz +; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NOV-NEXT: lui a2, 16 ; CHECK-NOV-NEXT: addiw a2, a2, -1 -; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz -; CHECK-NOV-NEXT: blt a0, a2, .LBB11_2 +; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz +; CHECK-NOV-NEXT: blt a1, a2, .LBB11_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: .LBB11_2: # %entry -; CHECK-NOV-NEXT: blt a1, a2, .LBB11_4 +; CHECK-NOV-NEXT: blt a0, a2, .LBB11_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: .LBB11_4: # %entry ; CHECK-NOV-NEXT: sgtz a2, a1 ; CHECK-NOV-NEXT: sgtz a3, a0 @@ -1122,46 +1101,46 @@ entry: define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz ; CHECK-NOV-NEXT: lui a4, 16 ; CHECK-NOV-NEXT: addiw a4, a4, -1 -; CHECK-NOV-NEXT: fcvt.w.s a2, fa1, rtz +; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz ; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz ; CHECK-NOV-NEXT: bge a2, a4, .LBB14_7 ; CHECK-NOV-NEXT: .LBB14_2: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz ; CHECK-NOV-NEXT: bge a3, a4, .LBB14_8 ; CHECK-NOV-NEXT: .LBB14_3: # %entry ; CHECK-NOV-NEXT: blt a5, a4, .LBB14_5 ; CHECK-NOV-NEXT: .LBB14_4: # %entry ; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB14_5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a5 -; CHECK-NOV-NEXT: sgtz a6, a3 -; CHECK-NOV-NEXT: sgtz a7, a2 -; CHECK-NOV-NEXT: sgtz t0, a1 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: sgtz a6, a2 +; CHECK-NOV-NEXT: sgtz a7, a3 +; CHECK-NOV-NEXT: sgtz t0, a5 ; CHECK-NOV-NEXT: negw t0, t0 -; CHECK-NOV-NEXT: and a1, t0, a1 +; CHECK-NOV-NEXT: and a5, t0, a5 ; CHECK-NOV-NEXT: negw a7, a7 -; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: and a3, a7, a3 ; CHECK-NOV-NEXT: negw a6, a6 -; CHECK-NOV-NEXT: and a3, a6, a3 +; CHECK-NOV-NEXT: and a2, a6, a2 ; CHECK-NOV-NEXT: negw a4, a4 -; CHECK-NOV-NEXT: and a4, a4, a5 -; CHECK-NOV-NEXT: sh a4, 6(a0) -; CHECK-NOV-NEXT: sh a3, 4(a0) -; CHECK-NOV-NEXT: sh a2, 2(a0) -; CHECK-NOV-NEXT: sh a1, 0(a0) +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: sh a1, 6(a0) +; CHECK-NOV-NEXT: sh a2, 4(a0) +; CHECK-NOV-NEXT: sh a3, 2(a0) +; CHECK-NOV-NEXT: sh a5, 0(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB14_6: # %entry ; CHECK-NOV-NEXT: mv a1, a4 -; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz ; CHECK-NOV-NEXT: blt a2, a4, .LBB14_2 ; CHECK-NOV-NEXT: .LBB14_7: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.w.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz ; CHECK-NOV-NEXT: blt a3, a4, .LBB14_3 ; CHECK-NOV-NEXT: .LBB14_8: # %entry ; CHECK-NOV-NEXT: mv a3, a4 @@ -1404,90 +1383,125 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -1682,90 +1696,125 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 -; CHECK-V-NEXT: call __extendhfsf2 -; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s5 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s5 +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -1822,14 +1871,14 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs4, -112 ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 -; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 56(a1) -; CHECK-NOV-NEXT: lhu s3, 48(a1) -; CHECK-NOV-NEXT: lhu s4, 40(a1) -; CHECK-NOV-NEXT: lhu s5, 32(a1) -; CHECK-NOV-NEXT: lhu s6, 24(a1) -; CHECK-NOV-NEXT: lhu s7, 16(a1) -; CHECK-NOV-NEXT: lhu a1, 8(a1) +; CHECK-NOV-NEXT: lhu s1, 56(a1) +; CHECK-NOV-NEXT: lhu s2, 0(a1) +; CHECK-NOV-NEXT: lhu s3, 8(a1) +; CHECK-NOV-NEXT: lhu s4, 16(a1) +; CHECK-NOV-NEXT: lhu s5, 24(a1) +; CHECK-NOV-NEXT: lhu s6, 32(a1) +; CHECK-NOV-NEXT: lhu s7, 40(a1) +; CHECK-NOV-NEXT: lhu a1, 48(a1) ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 @@ -1882,38 +1931,38 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .LBB17_8: # %entry ; CHECK-NOV-NEXT: mv a7, a3 ; CHECK-NOV-NEXT: .LBB17_9: # %entry -; CHECK-NOV-NEXT: sgtz a3, a7 -; CHECK-NOV-NEXT: sgtz t0, a6 -; CHECK-NOV-NEXT: sgtz t1, a5 -; CHECK-NOV-NEXT: sgtz t2, a4 -; CHECK-NOV-NEXT: sgtz t3, a2 -; CHECK-NOV-NEXT: sgtz t4, a1 -; CHECK-NOV-NEXT: sgtz t5, s1 -; CHECK-NOV-NEXT: sgtz t6, a0 +; CHECK-NOV-NEXT: sgtz a3, a0 +; CHECK-NOV-NEXT: sgtz t0, s1 +; CHECK-NOV-NEXT: sgtz t1, a1 +; CHECK-NOV-NEXT: sgtz t2, a2 +; CHECK-NOV-NEXT: sgtz t3, a4 +; CHECK-NOV-NEXT: sgtz t4, a5 +; CHECK-NOV-NEXT: sgtz t5, a6 +; CHECK-NOV-NEXT: sgtz t6, a7 ; CHECK-NOV-NEXT: negw t6, t6 -; CHECK-NOV-NEXT: and a0, t6, a0 +; CHECK-NOV-NEXT: and a7, t6, a7 ; CHECK-NOV-NEXT: negw t5, t5 -; CHECK-NOV-NEXT: and t5, t5, s1 +; CHECK-NOV-NEXT: and a6, t5, a6 ; CHECK-NOV-NEXT: negw t4, t4 -; CHECK-NOV-NEXT: and a1, t4, a1 +; CHECK-NOV-NEXT: and a5, t4, a5 ; CHECK-NOV-NEXT: negw t3, t3 -; CHECK-NOV-NEXT: and a2, t3, a2 +; CHECK-NOV-NEXT: and a4, t3, a4 ; CHECK-NOV-NEXT: negw t2, t2 -; CHECK-NOV-NEXT: and a4, t2, a4 +; CHECK-NOV-NEXT: and a2, t2, a2 ; CHECK-NOV-NEXT: negw t1, t1 -; CHECK-NOV-NEXT: and a5, t1, a5 +; CHECK-NOV-NEXT: and a1, t1, a1 ; CHECK-NOV-NEXT: negw t0, t0 -; CHECK-NOV-NEXT: and a6, t0, a6 +; CHECK-NOV-NEXT: and t0, t0, s1 ; CHECK-NOV-NEXT: negw a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a7 -; CHECK-NOV-NEXT: sh a3, 14(s0) -; CHECK-NOV-NEXT: sh a6, 12(s0) -; CHECK-NOV-NEXT: sh a5, 10(s0) -; CHECK-NOV-NEXT: sh a4, 8(s0) -; CHECK-NOV-NEXT: sh a2, 6(s0) -; CHECK-NOV-NEXT: sh a1, 4(s0) -; CHECK-NOV-NEXT: sh t5, 2(s0) -; CHECK-NOV-NEXT: sh a0, 0(s0) +; CHECK-NOV-NEXT: and a0, a3, a0 +; CHECK-NOV-NEXT: sh a0, 14(s0) +; CHECK-NOV-NEXT: sh t0, 12(s0) +; CHECK-NOV-NEXT: sh a1, 10(s0) +; CHECK-NOV-NEXT: sh a2, 8(s0) +; CHECK-NOV-NEXT: sh a4, 6(s0) +; CHECK-NOV-NEXT: sh a5, 4(s0) +; CHECK-NOV-NEXT: sh a6, 2(s0) +; CHECK-NOV-NEXT: sh a7, 0(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -1982,94 +2031,129 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vslideup.vi v8, v10, 4 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addi a0, a0, -1 -; CHECK-V-NEXT: vmin.vx v8, v10, a0 +; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -2106,66 +2190,65 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.d fs0, fa1 +; CHECK-NOV-NEXT: fmv.d fs0, fa0 +; CHECK-NOV-NEXT: fmv.d fa0, fa1 ; CHECK-NOV-NEXT: call __fixdfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixdfti -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: li a0, -1 -; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: beqz a1, .LBB18_3 +; CHECK-NOV-NEXT: li a2, -1 +; CHECK-NOV-NEXT: srli a3, a2, 1 +; CHECK-NOV-NEXT: beqz s1, .LBB18_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 0 -; CHECK-NOV-NEXT: bnez s1, .LBB18_4 +; CHECK-NOV-NEXT: slti a4, s1, 0 +; CHECK-NOV-NEXT: bnez a1, .LBB18_4 ; CHECK-NOV-NEXT: .LBB18_2: -; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: sltu a5, a0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB18_5 ; CHECK-NOV-NEXT: j .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_3: -; CHECK-NOV-NEXT: sltu a4, a2, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB18_2 +; CHECK-NOV-NEXT: sltu a4, s0, a3 +; CHECK-NOV-NEXT: beqz a1, .LBB18_2 ; CHECK-NOV-NEXT: .LBB18_4: # %entry -; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: slti a5, a1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_5: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: .LBB18_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: and a5, a5, s1 ; CHECK-NOV-NEXT: bnez a4, .LBB18_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB18_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, s1 -; CHECK-NOV-NEXT: slli a1, a0, 63 -; CHECK-NOV-NEXT: beq a5, a0, .LBB18_11 +; CHECK-NOV-NEXT: and a4, a6, a1 +; CHECK-NOV-NEXT: slli a1, a2, 63 +; CHECK-NOV-NEXT: beq a5, a2, .LBB18_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB18_12 +; CHECK-NOV-NEXT: bne a4, a2, .LBB18_12 ; CHECK-NOV-NEXT: .LBB18_10: -; CHECK-NOV-NEXT: sltu a0, a1, s0 -; CHECK-NOV-NEXT: beqz a0, .LBB18_13 +; CHECK-NOV-NEXT: sltu a2, a1, a0 +; CHECK-NOV-NEXT: beqz a2, .LBB18_13 ; CHECK-NOV-NEXT: j .LBB18_14 ; CHECK-NOV-NEXT: .LBB18_11: -; CHECK-NOV-NEXT: sltu a3, a1, a2 -; CHECK-NOV-NEXT: beq a4, a0, .LBB18_10 +; CHECK-NOV-NEXT: sltu a3, a1, s0 +; CHECK-NOV-NEXT: beq a4, a2, .LBB18_10 ; CHECK-NOV-NEXT: .LBB18_12: # %entry -; CHECK-NOV-NEXT: slti a0, a4, 0 -; CHECK-NOV-NEXT: xori a0, a0, 1 -; CHECK-NOV-NEXT: bnez a0, .LBB18_14 +; CHECK-NOV-NEXT: slti a2, a4, 0 +; CHECK-NOV-NEXT: xori a2, a2, 1 +; CHECK-NOV-NEXT: bnez a2, .LBB18_14 ; CHECK-NOV-NEXT: .LBB18_13: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: .LBB18_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB18_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB18_16: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2190,43 +2273,43 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz a1, .LBB18_3 +; CHECK-V-NEXT: beqz s1, .LBB18_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: bnez s1, .LBB18_4 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: bnez a1, .LBB18_4 ; CHECK-V-NEXT: .LBB18_2: -; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: sltu a5, a0, a3 ; CHECK-V-NEXT: beqz a5, .LBB18_5 ; CHECK-V-NEXT: j .LBB18_6 ; CHECK-V-NEXT: .LBB18_3: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: beqz s1, .LBB18_2 +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: beqz a1, .LBB18_2 ; CHECK-V-NEXT: .LBB18_4: # %entry -; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: slti a5, a1, 0 ; CHECK-V-NEXT: bnez a5, .LBB18_6 ; CHECK-V-NEXT: .LBB18_5: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB18_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: and a5, a5, s1 ; CHECK-V-NEXT: bnez a4, .LBB18_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB18_8: # %entry -; CHECK-V-NEXT: and a4, a6, s1 +; CHECK-V-NEXT: and a4, a6, a1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB18_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -2234,26 +2317,26 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB18_12 ; CHECK-V-NEXT: .LBB18_10: -; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: sltu a2, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB18_13 ; CHECK-V-NEXT: j .LBB18_14 ; CHECK-V-NEXT: .LBB18_11: -; CHECK-V-NEXT: sltu a3, a1, a0 +; CHECK-V-NEXT: sltu a3, a1, s0 ; CHECK-V-NEXT: beq a4, a2, .LBB18_10 ; CHECK-V-NEXT: .LBB18_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB18_14 ; CHECK-V-NEXT: .LBB18_13: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB18_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB18_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB18_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, s0 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2286,19 +2369,19 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.d fs0, fa1 +; CHECK-NOV-NEXT: fmv.d fs0, fa0 +; CHECK-NOV-NEXT: fmv.d fa0, fa1 ; CHECK-NOV-NEXT: call __fixunsdfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti -; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: snez a2, s1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a2, a2, s0 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: addi a1, a2, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2323,25 +2406,25 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a2 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2383,32 +2466,32 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB20_2: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB20_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB20_4: # %entry -; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB20_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB20_8 ; CHECK-NOV-NEXT: .LBB20_6: -; CHECK-NOV-NEXT: snez a0, a4 +; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB20_9 ; CHECK-NOV-NEXT: .LBB20_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB20_6 ; CHECK-NOV-NEXT: .LBB20_8: # %entry -; CHECK-NOV-NEXT: sgtz a0, a2 +; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB20_9: # %entry -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, a4 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -2450,15 +2533,15 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB20_2: # %entry -; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: blez a1, .LBB20_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB20_4: # %entry -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB20_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 @@ -2513,66 +2596,65 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.s fs0, fa1 +; CHECK-NOV-NEXT: fmv.s fs0, fa0 +; CHECK-NOV-NEXT: fmv.s fa0, fa1 ; CHECK-NOV-NEXT: call __fixsfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixsfti -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: li a0, -1 -; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: beqz a1, .LBB21_3 +; CHECK-NOV-NEXT: li a2, -1 +; CHECK-NOV-NEXT: srli a3, a2, 1 +; CHECK-NOV-NEXT: beqz s1, .LBB21_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 0 -; CHECK-NOV-NEXT: bnez s1, .LBB21_4 +; CHECK-NOV-NEXT: slti a4, s1, 0 +; CHECK-NOV-NEXT: bnez a1, .LBB21_4 ; CHECK-NOV-NEXT: .LBB21_2: -; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: sltu a5, a0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB21_5 ; CHECK-NOV-NEXT: j .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_3: -; CHECK-NOV-NEXT: sltu a4, a2, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB21_2 +; CHECK-NOV-NEXT: sltu a4, s0, a3 +; CHECK-NOV-NEXT: beqz a1, .LBB21_2 ; CHECK-NOV-NEXT: .LBB21_4: # %entry -; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: slti a5, a1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_5: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: .LBB21_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: and a5, a5, s1 ; CHECK-NOV-NEXT: bnez a4, .LBB21_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB21_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, s1 -; CHECK-NOV-NEXT: slli a1, a0, 63 -; CHECK-NOV-NEXT: beq a5, a0, .LBB21_11 +; CHECK-NOV-NEXT: and a4, a6, a1 +; CHECK-NOV-NEXT: slli a1, a2, 63 +; CHECK-NOV-NEXT: beq a5, a2, .LBB21_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB21_12 +; CHECK-NOV-NEXT: bne a4, a2, .LBB21_12 ; CHECK-NOV-NEXT: .LBB21_10: -; CHECK-NOV-NEXT: sltu a0, a1, s0 -; CHECK-NOV-NEXT: beqz a0, .LBB21_13 +; CHECK-NOV-NEXT: sltu a2, a1, a0 +; CHECK-NOV-NEXT: beqz a2, .LBB21_13 ; CHECK-NOV-NEXT: j .LBB21_14 ; CHECK-NOV-NEXT: .LBB21_11: -; CHECK-NOV-NEXT: sltu a3, a1, a2 -; CHECK-NOV-NEXT: beq a4, a0, .LBB21_10 +; CHECK-NOV-NEXT: sltu a3, a1, s0 +; CHECK-NOV-NEXT: beq a4, a2, .LBB21_10 ; CHECK-NOV-NEXT: .LBB21_12: # %entry -; CHECK-NOV-NEXT: slti a0, a4, 0 -; CHECK-NOV-NEXT: xori a0, a0, 1 -; CHECK-NOV-NEXT: bnez a0, .LBB21_14 +; CHECK-NOV-NEXT: slti a2, a4, 0 +; CHECK-NOV-NEXT: xori a2, a2, 1 +; CHECK-NOV-NEXT: bnez a2, .LBB21_14 ; CHECK-NOV-NEXT: .LBB21_13: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: .LBB21_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB21_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB21_16: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2597,43 +2679,43 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz a1, .LBB21_3 +; CHECK-V-NEXT: beqz s1, .LBB21_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: bnez s1, .LBB21_4 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: bnez a1, .LBB21_4 ; CHECK-V-NEXT: .LBB21_2: -; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: sltu a5, a0, a3 ; CHECK-V-NEXT: beqz a5, .LBB21_5 ; CHECK-V-NEXT: j .LBB21_6 ; CHECK-V-NEXT: .LBB21_3: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: beqz s1, .LBB21_2 +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: beqz a1, .LBB21_2 ; CHECK-V-NEXT: .LBB21_4: # %entry -; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: slti a5, a1, 0 ; CHECK-V-NEXT: bnez a5, .LBB21_6 ; CHECK-V-NEXT: .LBB21_5: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB21_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: and a5, a5, s1 ; CHECK-V-NEXT: bnez a4, .LBB21_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB21_8: # %entry -; CHECK-V-NEXT: and a4, a6, s1 +; CHECK-V-NEXT: and a4, a6, a1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB21_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -2641,26 +2723,26 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB21_12 ; CHECK-V-NEXT: .LBB21_10: -; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: sltu a2, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB21_13 ; CHECK-V-NEXT: j .LBB21_14 ; CHECK-V-NEXT: .LBB21_11: -; CHECK-V-NEXT: sltu a3, a1, a0 +; CHECK-V-NEXT: sltu a3, a1, s0 ; CHECK-V-NEXT: beq a4, a2, .LBB21_10 ; CHECK-V-NEXT: .LBB21_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB21_14 ; CHECK-V-NEXT: .LBB21_13: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB21_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB21_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB21_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, s0 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2693,19 +2775,19 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.s fs0, fa1 +; CHECK-NOV-NEXT: fmv.s fs0, fa0 +; CHECK-NOV-NEXT: fmv.s fa0, fa1 ; CHECK-NOV-NEXT: call __fixunssfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti -; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: snez a2, s1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a2, a2, s0 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: addi a1, a2, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2730,25 +2812,25 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a2 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2790,32 +2872,32 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB23_2: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB23_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB23_4: # %entry -; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB23_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB23_8 ; CHECK-NOV-NEXT: .LBB23_6: -; CHECK-NOV-NEXT: snez a0, a4 +; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB23_9 ; CHECK-NOV-NEXT: .LBB23_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB23_6 ; CHECK-NOV-NEXT: .LBB23_8: # %entry -; CHECK-NOV-NEXT: sgtz a0, a2 +; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB23_9: # %entry -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, a4 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -2857,15 +2939,15 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB23_2: # %entry -; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: blez a1, .LBB23_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB23_4: # %entry -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB23_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 @@ -2920,8 +3002,8 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s2, a1 -; CHECK-NOV-NEXT: fmv.w.x fa0, a0 +; CHECK-NOV-NEXT: mv s2, a0 +; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixsfti ; CHECK-NOV-NEXT: mv s0, a0 @@ -2929,60 +3011,58 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixsfti -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: li a0, -1 -; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: beqz a1, .LBB24_3 +; CHECK-NOV-NEXT: li a2, -1 +; CHECK-NOV-NEXT: srli a3, a2, 1 +; CHECK-NOV-NEXT: beqz s1, .LBB24_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 0 -; CHECK-NOV-NEXT: bnez s1, .LBB24_4 +; CHECK-NOV-NEXT: slti a4, s1, 0 +; CHECK-NOV-NEXT: bnez a1, .LBB24_4 ; CHECK-NOV-NEXT: .LBB24_2: -; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: sltu a5, a0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB24_5 ; CHECK-NOV-NEXT: j .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_3: -; CHECK-NOV-NEXT: sltu a4, a2, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB24_2 +; CHECK-NOV-NEXT: sltu a4, s0, a3 +; CHECK-NOV-NEXT: beqz a1, .LBB24_2 ; CHECK-NOV-NEXT: .LBB24_4: # %entry -; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: slti a5, a1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_5: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: .LBB24_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: and a5, a5, s1 ; CHECK-NOV-NEXT: bnez a4, .LBB24_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB24_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, s1 -; CHECK-NOV-NEXT: slli a1, a0, 63 -; CHECK-NOV-NEXT: beq a5, a0, .LBB24_11 +; CHECK-NOV-NEXT: and a4, a6, a1 +; CHECK-NOV-NEXT: slli a1, a2, 63 +; CHECK-NOV-NEXT: beq a5, a2, .LBB24_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB24_12 +; CHECK-NOV-NEXT: bne a4, a2, .LBB24_12 ; CHECK-NOV-NEXT: .LBB24_10: -; CHECK-NOV-NEXT: sltu a0, a1, s0 -; CHECK-NOV-NEXT: beqz a0, .LBB24_13 +; CHECK-NOV-NEXT: sltu a2, a1, a0 +; CHECK-NOV-NEXT: beqz a2, .LBB24_13 ; CHECK-NOV-NEXT: j .LBB24_14 ; CHECK-NOV-NEXT: .LBB24_11: -; CHECK-NOV-NEXT: sltu a3, a1, a2 -; CHECK-NOV-NEXT: beq a4, a0, .LBB24_10 +; CHECK-NOV-NEXT: sltu a3, a1, s0 +; CHECK-NOV-NEXT: beq a4, a2, .LBB24_10 ; CHECK-NOV-NEXT: .LBB24_12: # %entry -; CHECK-NOV-NEXT: slti a0, a4, 0 -; CHECK-NOV-NEXT: xori a0, a0, 1 -; CHECK-NOV-NEXT: bnez a0, .LBB24_14 +; CHECK-NOV-NEXT: slti a2, a4, 0 +; CHECK-NOV-NEXT: xori a2, a2, 1 +; CHECK-NOV-NEXT: bnez a2, .LBB24_14 ; CHECK-NOV-NEXT: .LBB24_13: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: .LBB24_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB24_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB24_16: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3002,8 +3082,8 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s2, a1 -; CHECK-V-NEXT: fmv.w.x fa0, a0 +; CHECK-V-NEXT: mv s2, a0 +; CHECK-V-NEXT: fmv.w.x fa0, a1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 @@ -3013,31 +3093,31 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz a1, .LBB24_3 +; CHECK-V-NEXT: beqz s1, .LBB24_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: bnez s1, .LBB24_4 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: bnez a1, .LBB24_4 ; CHECK-V-NEXT: .LBB24_2: -; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: sltu a5, a0, a3 ; CHECK-V-NEXT: beqz a5, .LBB24_5 ; CHECK-V-NEXT: j .LBB24_6 ; CHECK-V-NEXT: .LBB24_3: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: beqz s1, .LBB24_2 +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: beqz a1, .LBB24_2 ; CHECK-V-NEXT: .LBB24_4: # %entry -; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: slti a5, a1, 0 ; CHECK-V-NEXT: bnez a5, .LBB24_6 ; CHECK-V-NEXT: .LBB24_5: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB24_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: and a5, a5, s1 ; CHECK-V-NEXT: bnez a4, .LBB24_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB24_8: # %entry -; CHECK-V-NEXT: and a4, a6, s1 +; CHECK-V-NEXT: and a4, a6, a1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB24_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -3045,26 +3125,26 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB24_12 ; CHECK-V-NEXT: .LBB24_10: -; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: sltu a2, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB24_13 ; CHECK-V-NEXT: j .LBB24_14 ; CHECK-V-NEXT: .LBB24_11: -; CHECK-V-NEXT: sltu a3, a1, a0 +; CHECK-V-NEXT: sltu a3, a1, s0 ; CHECK-V-NEXT: beq a4, a2, .LBB24_10 ; CHECK-V-NEXT: .LBB24_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB24_14 ; CHECK-V-NEXT: .LBB24_13: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB24_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB24_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB24_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a0 -; CHECK-V-NEXT: vmv.s.x v8, s0 +; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3095,8 +3175,8 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s0, a1 -; CHECK-NOV-NEXT: fmv.w.x fa0, a0 +; CHECK-NOV-NEXT: mv s0, a0 +; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixunssfti ; CHECK-NOV-NEXT: mv s1, a0 @@ -3104,13 +3184,12 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixunssfti -; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: snez a2, s2 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a2, a2, s1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: addi a1, a2, -1 +; CHECK-NOV-NEXT: and a1, a1, s1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3130,8 +3209,8 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s0, a1 -; CHECK-V-NEXT: fmv.w.x fa0, a0 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: fmv.w.x fa0, a1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s1, a0 @@ -3139,15 +3218,15 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixunssfti -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s2 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s1 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a0 -; CHECK-V-NEXT: vmv.s.x v8, a2 +; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3190,32 +3269,32 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB26_2: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB26_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB26_4: # %entry -; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB26_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB26_8 ; CHECK-NOV-NEXT: .LBB26_6: -; CHECK-NOV-NEXT: snez a0, a4 +; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB26_9 ; CHECK-NOV-NEXT: .LBB26_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB26_6 ; CHECK-NOV-NEXT: .LBB26_8: # %entry -; CHECK-NOV-NEXT: sgtz a0, a2 +; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB26_9: # %entry -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, a4 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -3251,15 +3330,15 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB26_2: # %entry -; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: blez a1, .LBB26_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB26_4: # %entry -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB26_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 @@ -3728,57 +3807,50 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -3894,57 +3966,50 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -4071,60 +4136,53 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vslideup.vi v8, v10, 2 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vmin.vx v8, v10, a0 +; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 @@ -4676,90 +4734,125 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -4952,90 +5045,125 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -5251,94 +5379,129 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 56(a0) -; CHECK-V-NEXT: lhu s1, 48(a0) -; CHECK-V-NEXT: lhu s2, 40(a0) -; CHECK-V-NEXT: lhu s3, 32(a0) -; CHECK-V-NEXT: lhu s4, 24(a0) -; CHECK-V-NEXT: lhu s5, 16(a0) -; CHECK-V-NEXT: lhu s6, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu s3, 24(a0) +; CHECK-V-NEXT: lhu s4, 32(a0) +; CHECK-V-NEXT: lhu s5, 40(a0) +; CHECK-V-NEXT: lhu s6, 48(a0) +; CHECK-V-NEXT: lhu a0, 56(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 3 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 4 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 5 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v10, v8, 7 +; CHECK-V-NEXT: vslideup.vi v8, v10, 4 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addi a0, a0, -1 -; CHECK-V-NEXT: vmin.vx v8, v10, a0 +; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index f3ae03af7c7868..0b236f6d3ff388 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -2136,17 +2136,18 @@ define @mgather_baseidx_nxv32i8(ptr %base, ; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t ; RV64-NEXT: srli a1, a1, 2 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v0, v16, a1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t +; RV64-NEXT: vslidedown.vx v8, v16, a1 ; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 +; RV64-NEXT: vslidedown.vx v0, v8, a2 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v11 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v15, (a0), v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs diff --git a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll index c27488b18a017a..d13d67fd0a8824 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll @@ -9,39 +9,38 @@ define <4 x float> @foo(ptr %0) nounwind { ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: lhu s0, 6(a0) -; CHECK-NEXT: lhu s1, 4(a0) -; CHECK-NEXT: lhu s2, 0(a0) -; CHECK-NEXT: lhu a0, 2(a0) +; CHECK-NEXT: lhu s0, 0(a0) +; CHECK-NEXT: lhu s1, 2(a0) +; CHECK-NEXT: lhu s2, 4(a0) +; CHECK-NEXT: lhu a0, 6(a0) ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 8(sp) +; CHECK-NEXT: fsw fa0, 4(sp) ; CHECK-NEXT: fmv.w.x fa0, s2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 0(sp) +; CHECK-NEXT: fsw fa0, 12(sp) ; CHECK-NEXT: fmv.w.x fa0, s1 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 12(sp) +; CHECK-NEXT: fsw fa0, 8(sp) ; CHECK-NEXT: fmv.w.x fa0, s0 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 4(sp) -; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: fsw fa0, 0(sp) +; CHECK-NEXT: addi a0, sp, 4 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, sp, 12 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: addi a0, sp, 4 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll index 81076e41a7cb76..122ac13cb25731 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll @@ -13,18 +13,18 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) { ; RV32: # %bb.0: ; RV32-NEXT: lw a0, 0(a0) ; RV32-NEXT: srli a2, a0, 16 -; RV32-NEXT: srli a3, a0, 8 -; RV32-NEXT: slli a4, a0, 16 -; RV32-NEXT: srai a4, a4, 24 +; RV32-NEXT: slli a3, a0, 16 +; RV32-NEXT: srli a4, a3, 24 +; RV32-NEXT: srai a3, a3, 24 ; RV32-NEXT: slli a5, a0, 24 ; RV32-NEXT: srai a5, a5, 24 ; RV32-NEXT: slli a6, a0, 8 ; RV32-NEXT: srai a6, a6, 24 ; RV32-NEXT: sgtz a6, a6 ; RV32-NEXT: sgtz a5, a5 -; RV32-NEXT: sgtz a4, a4 -; RV32-NEXT: neg a4, a4 -; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: sgtz a3, a3 +; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: slli a3, a3, 8 ; RV32-NEXT: neg a4, a5 ; RV32-NEXT: and a0, a4, a0 @@ -39,19 +39,19 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) { ; RV64-LABEL: vec3_setcc_crash: ; RV64: # %bb.0: ; RV64-NEXT: lw a0, 0(a0) -; RV64-NEXT: srli a2, a0, 16 -; RV64-NEXT: srli a3, a0, 8 -; RV64-NEXT: slli a4, a0, 48 -; RV64-NEXT: srai a4, a4, 56 +; RV64-NEXT: srliw a2, a0, 16 +; RV64-NEXT: slli a3, a0, 48 +; RV64-NEXT: srli a4, a3, 56 +; RV64-NEXT: srai a3, a3, 56 ; RV64-NEXT: slli a5, a0, 56 ; RV64-NEXT: srai a5, a5, 56 ; RV64-NEXT: slli a6, a0, 40 ; RV64-NEXT: srai a6, a6, 56 ; RV64-NEXT: sgtz a6, a6 ; RV64-NEXT: sgtz a5, a5 -; RV64-NEXT: sgtz a4, a4 -; RV64-NEXT: negw a4, a4 -; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: sgtz a3, a3 +; RV64-NEXT: negw a3, a3 +; RV64-NEXT: and a3, a3, a4 ; RV64-NEXT: slli a3, a3, 8 ; RV64-NEXT: negw a4, a5 ; RV64-NEXT: and a0, a4, a0 diff --git a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll index de36bcdb910609..069b2febc334d2 100644 --- a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll +++ b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll @@ -422,7 +422,8 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { ; RV32I-NEXT: lui a1, 1048560 ; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: sltu a1, a1, a2 -; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; @@ -462,7 +463,8 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { ; RV32I-NEXT: addi a2, a0, -128 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: sltiu a1, a2, -256 ; RV32I-NEXT: xori a1, a1, 1 ; RV32I-NEXT: and a0, a0, a1 @@ -691,7 +693,8 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { ; RV32I-NEXT: addi a2, a0, 128 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: sltiu a1, a2, 256 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/Thumb/PR35481.ll b/llvm/test/CodeGen/Thumb/PR35481.ll index e48d1547782caf..ad3215ecb94952 100644 --- a/llvm/test/CodeGen/Thumb/PR35481.ll +++ b/llvm/test/CodeGen/Thumb/PR35481.ll @@ -18,10 +18,11 @@ define <4 x i32> @f() local_unnamed_addr #0 { ; CHECK-V4T-NEXT: movs r2, #3 ; CHECK-V4T-NEXT: movs r3, #4 ; CHECK-V4T-NEXT: bl g -; CHECK-V4T-NEXT: ldr r7, [sp, #4] -; CHECK-V4T-NEXT: mov lr, r7 ; CHECK-V4T-NEXT: pop {r7} -; CHECK-V4T-NEXT: add sp, #4 +; CHECK-V4T-NEXT: mov r12, r0 +; CHECK-V4T-NEXT: pop {r0} +; CHECK-V4T-NEXT: mov lr, r0 +; CHECK-V4T-NEXT: mov r0, r12 ; CHECK-V4T-NEXT: bx lr ; ; CHECK-V8M-LABEL: f: @@ -35,10 +36,11 @@ define <4 x i32> @f() local_unnamed_addr #0 { ; CHECK-V8M-NEXT: movs r1, #2 ; CHECK-V8M-NEXT: movs r2, #3 ; CHECK-V8M-NEXT: movs r3, #4 -; CHECK-V8M-NEXT: ldr r7, [sp, #4] -; CHECK-V8M-NEXT: mov lr, r7 ; CHECK-V8M-NEXT: pop {r7} -; CHECK-V8M-NEXT: add sp, #4 +; CHECK-V8M-NEXT: mov r12, r0 +; CHECK-V8M-NEXT: pop {r0} +; CHECK-V8M-NEXT: mov lr, r0 +; CHECK-V8M-NEXT: mov r0, r12 ; CHECK-V8M-NEXT: b g entry: %call = tail call i32 @h(i32 1) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-sdiv.mir new file mode 100644 index 00000000000000..95c69209a2c354 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-sdiv.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X64 +# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X86 + +... +--- +name: test_sdiv_i8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_sdiv_i8 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s8) = G_SDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $al = COPY [[SDIV]](s8) + ; CHECK-NEXT: RET 0, implicit $al + %2:_(s32) = COPY $edi + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_SDIV %0, %1 + $al = COPY %4(s8) + RET 0, implicit $al + +... +--- +name: test_sdiv_i16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_sdiv_i16 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $ax = COPY [[SDIV]](s16) + ; CHECK-NEXT: RET 0, implicit $ax + %2:_(s32) = COPY $edi + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_SDIV %0, %1 + $ax = COPY %4(s16) + RET 0, implicit $ax + +... +--- +name: test_sdiv_i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_sdiv_i32 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[SDIV]](s32) + ; CHECK-NEXT: RET 0, implicit $eax + %0:_(s32) = COPY $edi + %1:_(s32) = COPY $esi + %2:_(s32) = G_SDIV %0, %1 + $eax = COPY %2(s32) + RET 0, implicit $eax + +... +--- +name: test_sdiv_i64 +tracksRegLiveness: true +body: | + bb.1: + ; X64-LABEL: name: test_sdiv_i64 + ; X64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[DEF]], [[DEF1]] + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SDIV]](s64) + ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; + ; X86-LABEL: name: test_sdiv_i64 + ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; X86-NEXT: G_STORE [[UV]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) + ; X86-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; X86-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) + ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) + ; X86-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; X86-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32) + ; X86-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1) + ; X86-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; X86-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; X86-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1) + ; X86-NEXT: CALLpcrel32 &__divdi3, csr_32, implicit $esp, implicit $ssp, implicit-def $eax, implicit-def $edx + ; X86-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $eax + ; X86-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $edx + ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; X86-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; X86-NEXT: RET 0, implicit [[COPY6]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = IMPLICIT_DEF + %2:_(s64) = G_SDIV %0, %1 + %3:_(s64) = COPY %2(s64) + RET 0, implicit %3 + +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-srem.mir new file mode 100644 index 00000000000000..ab7d89de5aa0d4 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-srem.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X64 +# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X86 + +... +--- +name: test_srem_i8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_srem_i8 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SREM:%[0-9]+]]:_(s8) = G_SREM [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $al = COPY [[SREM]](s8) + ; CHECK-NEXT: RET 0, implicit $al + %2:_(s32) = COPY $edi + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_SREM %0, %1 + $al = COPY %4(s8) + RET 0, implicit $al + +... +--- +name: test_srem_i16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_srem_i16 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $ax = COPY [[SREM]](s16) + ; CHECK-NEXT: RET 0, implicit $ax + %2:_(s32) = COPY $edi + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_SREM %0, %1 + $ax = COPY %4(s16) + RET 0, implicit $ax + +... +--- +name: test_srem_i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_srem_i32 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[SREM]](s32) + ; CHECK-NEXT: RET 0, implicit $eax + %0:_(s32) = COPY $edi + %1:_(s32) = COPY $esi + %2:_(s32) = G_SREM %0, %1 + $eax = COPY %2(s32) + RET 0, implicit $eax + +... +--- +name: test_srem_i64 +tracksRegLiveness: true +body: | + bb.1: + ; X64-LABEL: name: test_srem_i64 + ; X64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[DEF]], [[DEF1]] + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SREM]](s64) + ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; + ; X86-LABEL: name: test_srem_i64 + ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; X86-NEXT: G_STORE [[UV]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) + ; X86-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; X86-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) + ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) + ; X86-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; X86-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32) + ; X86-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1) + ; X86-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; X86-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; X86-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1) + ; X86-NEXT: CALLpcrel32 &__moddi3, csr_32, implicit $esp, implicit $ssp, implicit-def $eax, implicit-def $edx + ; X86-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $eax + ; X86-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $edx + ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; X86-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; X86-NEXT: RET 0, implicit [[COPY6]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = IMPLICIT_DEF + %2:_(s64) = G_SREM %0, %1 + %3:_(s64) = COPY %2(s64) + RET 0, implicit %3 + +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-udiv.mir new file mode 100644 index 00000000000000..233fada9c6c892 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-udiv.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X64 +# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X86 + +... +--- +name: test_udiv_i8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_udiv_i8 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s8) = G_UDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $al = COPY [[UDIV]](s8) + ; CHECK-NEXT: RET 0, implicit $al + %2:_(s32) = COPY $edi + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_UDIV %0, %1 + $al = COPY %4(s8) + RET 0, implicit $al + +... +--- +name: test_udiv_i16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_udiv_i16 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s16) = G_UDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $ax = COPY [[UDIV]](s16) + ; CHECK-NEXT: RET 0, implicit $ax + %2:_(s32) = COPY $edi + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_UDIV %0, %1 + $ax = COPY %4(s16) + RET 0, implicit $ax + +... +--- +name: test_udiv_i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_udiv_i32 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[UDIV]](s32) + ; CHECK-NEXT: RET 0, implicit $eax + %0:_(s32) = COPY $edi + %1:_(s32) = COPY $esi + %2:_(s32) = G_UDIV %0, %1 + $eax = COPY %2(s32) + RET 0, implicit $eax + +... +--- +name: test_udiv_i64 +tracksRegLiveness: true +body: | + bb.1: + ; X64-LABEL: name: test_udiv_i64 + ; X64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[DEF]], [[DEF1]] + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UDIV]](s64) + ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; + ; X86-LABEL: name: test_udiv_i64 + ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; X86-NEXT: G_STORE [[UV]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) + ; X86-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; X86-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) + ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) + ; X86-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; X86-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32) + ; X86-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1) + ; X86-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; X86-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; X86-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1) + ; X86-NEXT: CALLpcrel32 &__udivdi3, csr_32, implicit $esp, implicit $ssp, implicit-def $eax, implicit-def $edx + ; X86-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $eax + ; X86-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $edx + ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; X86-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; X86-NEXT: RET 0, implicit [[COPY6]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = IMPLICIT_DEF + %2:_(s64) = G_UDIV %0, %1 + %3:_(s64) = COPY %2(s64) + RET 0, implicit %3 + +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-urem.mir new file mode 100644 index 00000000000000..85f6063dbd1e70 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-urem.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X64 +# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X86 + +... +--- +name: test_urem_i8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_urem_i8 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UREM:%[0-9]+]]:_(s8) = G_UREM [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $al = COPY [[UREM]](s8) + ; CHECK-NEXT: RET 0, implicit $al + %2:_(s32) = COPY $edi + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_UREM %0, %1 + $al = COPY %4(s8) + RET 0, implicit $al + +... +--- +name: test_urem_i16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_urem_i16 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UREM:%[0-9]+]]:_(s16) = G_UREM [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: $ax = COPY [[UREM]](s16) + ; CHECK-NEXT: RET 0, implicit $ax + %2:_(s32) = COPY $edi + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $esi + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_UREM %0, %1 + $ax = COPY %4(s16) + RET 0, implicit $ax + +... +--- +name: test_urem_i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $edi, $esi + + ; CHECK-LABEL: name: test_urem_i32 + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[COPY]], [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[UREM]](s32) + ; CHECK-NEXT: RET 0, implicit $eax + %0:_(s32) = COPY $edi + %1:_(s32) = COPY $esi + %2:_(s32) = G_UREM %0, %1 + $eax = COPY %2(s32) + RET 0, implicit $eax + +... +--- +name: test_urem_i64 +tracksRegLiveness: true +body: | + bb.1: + ; X64-LABEL: name: test_urem_i64 + ; X64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[DEF]], [[DEF1]] + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UREM]](s64) + ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; + ; X86-LABEL: name: test_urem_i64 + ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; X86-NEXT: G_STORE [[UV]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) + ; X86-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; X86-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) + ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) + ; X86-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; X86-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32) + ; X86-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1) + ; X86-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; X86-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; X86-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1) + ; X86-NEXT: CALLpcrel32 &__umoddi3, csr_32, implicit $esp, implicit $ssp, implicit-def $eax, implicit-def $edx + ; X86-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $eax + ; X86-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $edx + ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; X86-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; X86-NEXT: RET 0, implicit [[COPY6]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = IMPLICIT_DEF + %2:_(s64) = G_UREM %0, %1 + %3:_(s64) = COPY %2(s64) + RET 0, implicit %3 + +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-sdiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-sdiv.mir deleted file mode 100644 index 80382db942722c..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-sdiv.mir +++ /dev/null @@ -1,114 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'sdiv.ll' - source_filename = "sdiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_sdiv_i8(i8 %arg1, i8 %arg2) { - %res = sdiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_sdiv_i16(i16 %arg1, i16 %arg2) { - %res = sdiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_sdiv_i32(i32 %arg1, i32 %arg2) { - %res = sdiv i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_sdiv_i8 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SDIV:%[0-9]+]]:_(s8) = G_SDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[SDIV]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_SDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_sdiv_i16 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[SDIV]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_SDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_sdiv_i32 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[SDIV]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_SDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir deleted file mode 100644 index 965bf635d6feb8..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir +++ /dev/null @@ -1,211 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'srem.ll' - source_filename = "srem.ll" - target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - target triple = "i386--linux-gnu" - - define i8 @test_srem_i8(i8 %arg1, i8 %arg2) { - %res = srem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_srem_i16(i16 %arg1, i16 %arg2) { - %res = srem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_srem_i32(i32 %arg1, i32 %arg2) { - %res = srem i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_srem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 1, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i8 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.1, align 4) - ; CHECK: [[SREM:%[0-9]+]]:_(s8) = G_SREM [[LOAD]], [[LOAD1]] - ; CHECK: $al = COPY [[SREM]](s8) - ; CHECK: RET 0, implicit $al - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) - %4:_(s8) = G_SREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_srem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 2, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 2, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i16 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s16) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s16) from %fixed-stack.1, align 4) - ; CHECK: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[LOAD]], [[LOAD1]] - ; CHECK: $ax = COPY [[SREM]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) - %4:_(s16) = G_SREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_srem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i32 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.1) - ; CHECK: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[LOAD]], [[LOAD1]] - ; CHECK: $eax = COPY [[SREM]](s32) - ; CHECK: RET 0, implicit $eax - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) - %4:_(s32) = G_SREM %0, %1 - $eax = COPY %4(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-udiv.mir deleted file mode 100644 index 85c9b6d9e86bfd..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-udiv.mir +++ /dev/null @@ -1,195 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'udiv.ll' - source_filename = "udiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) { - %res = udiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_udiv_i16(i16 %arg1, i16 %arg2) { - %res = udiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_udiv_i32(i32 %arg1, i32 %arg2) { - %res = udiv i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_udiv_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(s8) = G_UDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[UDIV]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_UDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_udiv_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(s16) = G_UDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[UDIV]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_UDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_udiv_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[UDIV]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_UDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir deleted file mode 100644 index b6496216ac56da..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir +++ /dev/null @@ -1,211 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'urem.ll' - source_filename = "urem.ll" - target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - target triple = "i386--linux-gnu" - - define i8 @test_urem_i8(i8 %arg1, i8 %arg2) { - %res = urem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_urem_i16(i16 %arg1, i16 %arg2) { - %res = urem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_urem_i32(i32 %arg1, i32 %arg2) { - %res = urem i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_urem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 1, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i8 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.1, align 4) - ; CHECK: [[UREM:%[0-9]+]]:_(s8) = G_UREM [[LOAD]], [[LOAD1]] - ; CHECK: $al = COPY [[UREM]](s8) - ; CHECK: RET 0, implicit $al - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) - %4:_(s8) = G_UREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_urem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 2, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 2, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i16 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s16) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s16) from %fixed-stack.1, align 4) - ; CHECK: [[UREM:%[0-9]+]]:_(s16) = G_UREM [[LOAD]], [[LOAD1]] - ; CHECK: $ax = COPY [[UREM]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) - %4:_(s16) = G_UREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_urem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i32 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.1) - ; CHECK: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[LOAD]], [[LOAD1]] - ; CHECK: $eax = COPY [[UREM]](s32) - ; CHECK: RET 0, implicit $eax - %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) - %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) - %4:_(s32) = G_UREM %0, %1 - $eax = COPY %4(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-sdiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-sdiv.mir deleted file mode 100644 index 653d867492dc11..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-sdiv.mir +++ /dev/null @@ -1,130 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i386-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'sdiv.ll' - source_filename = "sdiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_sdiv_i8(i8 %arg1, i8 %arg2) { - %res = sdiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_sdiv_i16(i16 %arg1, i16 %arg2) { - %res = sdiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_sdiv_i32(i32 %arg1, i32 %arg2) { - %res = sdiv i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_sdiv_i8 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr32_abcd = COPY [[COPY]] - ; CHECK: [[COPY2:%[0-9]+]]:gr8_abcd_l = COPY [[COPY1]].sub_8bit - ; CHECK: [[COPY3:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY4:%[0-9]+]]:gr32_abcd = COPY [[COPY3]] - ; CHECK: [[COPY5:%[0-9]+]]:gr8_abcd_l = COPY [[COPY4]].sub_8bit - ; CHECK: $ax = MOVSX16rr8 [[COPY2]] - ; CHECK: IDIV8r [[COPY5]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY6:%[0-9]+]]:gr8 = COPY $al - ; CHECK: $al = COPY [[COPY6]] - ; CHECK: RET 0, implicit $al - %2:gpr(s32) = COPY $edi - %0:gpr(s8) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s8) = G_TRUNC %3(s32) - %4:gpr(s8) = G_SDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_sdiv_i16 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK: $ax = COPY [[COPY1]] - ; CHECK: CWD implicit-def $ax, implicit-def $dx, implicit $ax - ; CHECK: IDIV16r [[COPY3]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY4:%[0-9]+]]:gr16 = COPY $ax - ; CHECK: $ax = COPY [[COPY4]] - ; CHECK: RET 0, implicit $ax - %2:gpr(s32) = COPY $edi - %0:gpr(s16) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s16) = G_TRUNC %3(s32) - %4:gpr(s16) = G_SDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_sdiv_i32 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: CDQ implicit-def $eax, implicit-def $edx, implicit $eax - ; CHECK: IDIV32r [[COPY1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $eax - ; CHECK: $eax = COPY [[COPY2]] - ; CHECK: RET 0, implicit $eax - %0:gpr(s32) = COPY $edi - %1:gpr(s32) = COPY $esi - %2:gpr(s32) = G_SDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir deleted file mode 100644 index a7f5badcdef061..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir +++ /dev/null @@ -1,213 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i386-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'srem.ll' - source_filename = "srem.ll" - target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - target triple = "i386--linux-gnu" - - define i8 @test_srem_i8(i8 %arg1, i8 %arg2) { - %res = srem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_srem_i16(i16 %arg1, i16 %arg2) { - %res = srem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_srem_i32(i32 %arg1, i32 %arg2) { - %res = srem i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_srem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 1, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i8 - ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.0, align 16) - ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.1, align 4) - ; CHECK: $ax = MOVSX16rr8 [[MOV8rm]] - ; CHECK: IDIV8r [[MOV8rm1]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY $ah - ; CHECK: $al = COPY [[COPY]] - ; CHECK: RET 0, implicit $al - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) - %4:gpr(s8) = G_SREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_srem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 2, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 2, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i16 - ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.0, align 16) - ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.1, align 4) - ; CHECK: $ax = COPY [[MOV16rm]] - ; CHECK: CWD implicit-def $ax, implicit-def $dx, implicit $ax - ; CHECK: IDIV16r [[MOV16rm1]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $dx - ; CHECK: $ax = COPY [[COPY]] - ; CHECK: RET 0, implicit $ax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) - %4:gpr(s16) = G_SREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_srem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_srem_i32 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0, align 16) - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.1) - ; CHECK: $eax = COPY [[MOV32rm]] - ; CHECK: CDQ implicit-def $eax, implicit-def $edx, implicit $eax - ; CHECK: IDIV32r [[MOV32rm1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edx - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: RET 0, implicit $eax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) - %4:gpr(s32) = G_SREM %0, %1 - $eax = COPY %4(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir deleted file mode 100644 index 1a960f9ad9e2c6..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir +++ /dev/null @@ -1,215 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i386-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'udiv.ll' - source_filename = "udiv.ll" - target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - target triple = "i386--linux-gnu" - - define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) { - %res = udiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_udiv_i16(i16 %arg1, i16 %arg2) { - %res = udiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_udiv_i32(i32 %arg1, i32 %arg2) { - %res = udiv i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_udiv_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 1, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_udiv_i8 - ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.0, align 16) - ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.1, align 4) - ; CHECK: $ax = MOVZX16rr8 [[MOV8rm]] - ; CHECK: DIV8r [[MOV8rm1]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY $al - ; CHECK: $al = COPY [[COPY]] - ; CHECK: RET 0, implicit $al - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) - %4:gpr(s8) = G_UDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_udiv_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 2, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 2, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_udiv_i16 - ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.0, align 16) - ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.1, align 4) - ; CHECK: $ax = COPY [[MOV16rm]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $dx = COPY [[MOV32r0_]].sub_16bit - ; CHECK: DIV16r [[MOV16rm1]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $ax - ; CHECK: $ax = COPY [[COPY]] - ; CHECK: RET 0, implicit $ax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) - %4:gpr(s16) = G_UDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_udiv_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_udiv_i32 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0) - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.1, align 16) - ; CHECK: $eax = COPY [[MOV32rm]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $edx = COPY [[MOV32r0_]] - ; CHECK: DIV32r [[MOV32rm1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $eax - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: RET 0, implicit $eax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 4) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 16) - %4:gpr(s32) = G_UDIV %0, %1 - $eax = COPY %4(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir deleted file mode 100644 index 23d2892ad91104..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir +++ /dev/null @@ -1,215 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=i386-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'urem.ll' - source_filename = "urem.ll" - target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - target triple = "i386--linux-gnu" - - define i8 @test_urem_i8(i8 %arg1, i8 %arg2) { - %res = urem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_urem_i16(i16 %arg1, i16 %arg2) { - %res = urem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_urem_i32(i32 %arg1, i32 %arg2) { - %res = urem i32 %arg1, %arg2 - ret i32 %res - } - -... ---- -name: test_urem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 1, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i8 - ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.0, align 16) - ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.1, align 4) - ; CHECK: $ax = MOVZX16rr8 [[MOV8rm]] - ; CHECK: DIV8r [[MOV8rm1]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY $ah - ; CHECK: $al = COPY [[COPY]] - ; CHECK: RET 0, implicit $al - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) - %4:gpr(s8) = G_UREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_urem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 2, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 2, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i16 - ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.0, align 16) - ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.1, align 4) - ; CHECK: $ax = COPY [[MOV16rm]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $dx = COPY [[MOV32r0_]].sub_16bit - ; CHECK: DIV16r [[MOV16rm1]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $dx - ; CHECK: $ax = COPY [[COPY]] - ; CHECK: RET 0, implicit $ax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) - %4:gpr(s16) = G_UREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_urem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: -constants: -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_urem_i32 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0, align 16) - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.1) - ; CHECK: $eax = COPY [[MOV32rm]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $edx = COPY [[MOV32r0_]] - ; CHECK: DIV32r [[MOV32rm1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edx - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: RET 0, implicit $eax - %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) - %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) - %4:gpr(s32) = G_UREM %0, %1 - $eax = COPY %4(s32) - RET 0, implicit $eax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sdiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sdiv.mir deleted file mode 100644 index faccc3750c806e..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-sdiv.mir +++ /dev/null @@ -1,145 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'sdiv.ll' - source_filename = "sdiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_sdiv_i8(i8 %arg1, i8 %arg2) { - %res = sdiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_sdiv_i16(i16 %arg1, i16 %arg2) { - %res = sdiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_sdiv_i32(i32 %arg1, i32 %arg2) { - %res = sdiv i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_sdiv_i64(i64 %arg1, i64 %arg2) { - %res = sdiv i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_sdiv_i8 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SDIV:%[0-9]+]]:_(s8) = G_SDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[SDIV]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_SDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_sdiv_i16 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[SDIV]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_SDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_sdiv_i32 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[SDIV]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_SDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_sdiv_i64 -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_sdiv_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi - ; CHECK: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[COPY]], [[COPY1]] - ; CHECK: $rax = COPY [[SDIV]](s64) - ; CHECK: RET 0, implicit $rax - %0:_(s64) = COPY $rdi - %1:_(s64) = COPY $rsi - %2:_(s64) = G_SDIV %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-srem.mir deleted file mode 100644 index f02442f2b8501e..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-srem.mir +++ /dev/null @@ -1,253 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'srem.ll' - source_filename = "srem.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_srem_i8(i8 %arg1, i8 %arg2) { - %res = srem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_srem_i16(i16 %arg1, i16 %arg2) { - %res = srem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_srem_i32(i32 %arg1, i32 %arg2) { - %res = srem i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_srem_i64(i64 %arg1, i64 %arg2) { - %res = srem i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_srem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SREM:%[0-9]+]]:_(s8) = G_SREM [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[SREM]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_SREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_srem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[SREM]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_SREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_srem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[SREM]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_SREM %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_srem_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_srem_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi - ; CHECK: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[COPY]], [[COPY1]] - ; CHECK: $rax = COPY [[SREM]](s64) - ; CHECK: RET 0, implicit $rax - %0:_(s64) = COPY $rdi - %1:_(s64) = COPY $rsi - %2:_(s64) = G_SREM %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-udiv.mir deleted file mode 100644 index 35073e2bcb1b1c..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-udiv.mir +++ /dev/null @@ -1,253 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'udiv.ll' - source_filename = "udiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) { - %res = udiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_udiv_i16(i16 %arg1, i16 %arg2) { - %res = udiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_udiv_i32(i32 %arg1, i32 %arg2) { - %res = udiv i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_udiv_i64(i64 %arg1, i64 %arg2) { - %res = udiv i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_udiv_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(s8) = G_UDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[UDIV]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_UDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_udiv_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(s16) = G_UDIV [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[UDIV]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_UDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_udiv_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[UDIV]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_UDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_udiv_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_udiv_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi - ; CHECK: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[COPY]], [[COPY1]] - ; CHECK: $rax = COPY [[UDIV]](s64) - ; CHECK: RET 0, implicit $rax - %0:_(s64) = COPY $rdi - %1:_(s64) = COPY $rsi - %2:_(s64) = G_UDIV %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-urem.mir deleted file mode 100644 index c0ca5ae74fc31f..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-urem.mir +++ /dev/null @@ -1,253 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'urem.ll' - source_filename = "urem.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_urem_i8(i8 %arg1, i8 %arg2) { - %res = urem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_urem_i16(i16 %arg1, i16 %arg2) { - %res = urem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_urem_i32(i32 %arg1, i32 %arg2) { - %res = urem i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_urem_i64(i64 %arg1, i64 %arg2) { - %res = urem i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_urem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UREM:%[0-9]+]]:_(s8) = G_UREM [[TRUNC]], [[TRUNC1]] - ; CHECK: $al = COPY [[UREM]](s8) - ; CHECK: RET 0, implicit $al - %2:_(s32) = COPY $edi - %0:_(s8) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s8) = G_TRUNC %3(s32) - %4:_(s8) = G_UREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_urem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } - - { id: 3, class: _, preferred-register: '' } - - { id: 4, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[UREM:%[0-9]+]]:_(s16) = G_UREM [[TRUNC]], [[TRUNC1]] - ; CHECK: $ax = COPY [[UREM]](s16) - ; CHECK: RET 0, implicit $ax - %2:_(s32) = COPY $edi - %0:_(s16) = G_TRUNC %2(s32) - %3:_(s32) = COPY $esi - %1:_(s16) = G_TRUNC %3(s32) - %4:_(s16) = G_UREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_urem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[COPY]], [[COPY1]] - ; CHECK: $eax = COPY [[UREM]](s32) - ; CHECK: RET 0, implicit $eax - %0:_(s32) = COPY $edi - %1:_(s32) = COPY $esi - %2:_(s32) = G_UREM %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_urem_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: _, preferred-register: '' } - - { id: 1, class: _, preferred-register: '' } - - { id: 2, class: _, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_urem_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi - ; CHECK: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[COPY]], [[COPY1]] - ; CHECK: $rax = COPY [[UREM]](s64) - ; CHECK: RET 0, implicit $rax - %0:_(s64) = COPY $rdi - %1:_(s64) = COPY $rsi - %2:_(s64) = G_UREM %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sdiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sdiv.mir deleted file mode 100644 index d3a1608be52a1b..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-sdiv.mir +++ /dev/null @@ -1,164 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'sdiv.ll' - source_filename = "sdiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_sdiv_i8(i8 %arg1, i8 %arg2) { - %res = sdiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_sdiv_i16(i16 %arg1, i16 %arg2) { - %res = sdiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_sdiv_i32(i32 %arg1, i32 %arg2) { - %res = sdiv i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_sdiv_i64(i64 %arg1, i64 %arg2) { - %res = sdiv i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_sdiv_i8 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY2]].sub_8bit - ; CHECK: $ax = MOVSX16rr8 [[COPY1]] - ; CHECK: IDIV8r [[COPY3]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY4:%[0-9]+]]:gr8 = COPY $al - ; CHECK: $al = COPY [[COPY4]] - ; CHECK: RET 0, implicit $al - %2:gpr(s32) = COPY $edi - %0:gpr(s8) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s8) = G_TRUNC %3(s32) - %4:gpr(s8) = G_SDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_sdiv_i16 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK: $ax = COPY [[COPY1]] - ; CHECK: CWD implicit-def $ax, implicit-def $dx, implicit $ax - ; CHECK: IDIV16r [[COPY3]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY4:%[0-9]+]]:gr16 = COPY $ax - ; CHECK: $ax = COPY [[COPY4]] - ; CHECK: RET 0, implicit $ax - %2:gpr(s32) = COPY $edi - %0:gpr(s16) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s16) = G_TRUNC %3(s32) - %4:gpr(s16) = G_SDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_sdiv_i32 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_sdiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: CDQ implicit-def $eax, implicit-def $edx, implicit $eax - ; CHECK: IDIV32r [[COPY1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $eax - ; CHECK: $eax = COPY [[COPY2]] - ; CHECK: RET 0, implicit $eax - %0:gpr(s32) = COPY $edi - %1:gpr(s32) = COPY $esi - %2:gpr(s32) = G_SDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_sdiv_i64 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_sdiv_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK: $rax = COPY [[COPY]] - ; CHECK: CQO implicit-def $rax, implicit-def $rdx, implicit $rax - ; CHECK: IDIV64r [[COPY1]], implicit-def $rax, implicit-def $rdx, implicit-def $eflags, implicit $rax, implicit $rdx - ; CHECK: [[COPY2:%[0-9]+]]:gr64 = COPY $rax - ; CHECK: $rax = COPY [[COPY2]] - ; CHECK: RET 0, implicit $rax - %0:gpr(s64) = COPY $rdi - %1:gpr(s64) = COPY $rsi - %2:gpr(s64) = G_SDIV %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-srem.mir deleted file mode 100644 index 0988883145bcdf..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-srem.mir +++ /dev/null @@ -1,270 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'srem.ll' - source_filename = "srem.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_srem_i8(i8 %arg1, i8 %arg2) { - %res = srem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_srem_i16(i16 %arg1, i16 %arg2) { - %res = srem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_srem_i32(i32 %arg1, i32 %arg2) { - %res = srem i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_srem_i64(i64 %arg1, i64 %arg2) { - %res = srem i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_srem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY2]].sub_8bit - ; CHECK-NEXT: $ax = MOVSX16rr8 [[COPY1]] - ; CHECK-NEXT: IDIV8r [[COPY3]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY $ax - ; CHECK-NEXT: [[SHR16ri:%[0-9]+]]:gr16 = SHR16ri [[COPY4]], 8, implicit-def $eflags - ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr8 = SUBREG_TO_REG 0, [[SHR16ri]], %subreg.sub_8bit - ; CHECK-NEXT: $al = COPY [[SUBREG_TO_REG]] - ; CHECK-NEXT: RET 0, implicit $al - %2:gpr(s32) = COPY $edi - %0:gpr(s8) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s8) = G_TRUNC %3(s32) - %4:gpr(s8) = G_SREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_srem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK-NEXT: $ax = COPY [[COPY1]] - ; CHECK-NEXT: CWD implicit-def $ax, implicit-def $dx, implicit $ax - ; CHECK-NEXT: IDIV16r [[COPY3]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY $dx - ; CHECK-NEXT: $ax = COPY [[COPY4]] - ; CHECK-NEXT: RET 0, implicit $ax - %2:gpr(s32) = COPY $edi - %0:gpr(s16) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s16) = G_TRUNC %3(s32) - %4:gpr(s16) = G_SREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_srem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_srem_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: $eax = COPY [[COPY]] - ; CHECK-NEXT: CDQ implicit-def $eax, implicit-def $edx, implicit $eax - ; CHECK-NEXT: IDIV32r [[COPY1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edx - ; CHECK-NEXT: $eax = COPY [[COPY2]] - ; CHECK-NEXT: RET 0, implicit $eax - %0:gpr(s32) = COPY $edi - %1:gpr(s32) = COPY $esi - %2:gpr(s32) = G_SREM %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_srem_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_srem_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK-NEXT: $rax = COPY [[COPY]] - ; CHECK-NEXT: CQO implicit-def $rax, implicit-def $rdx, implicit $rax - ; CHECK-NEXT: IDIV64r [[COPY1]], implicit-def $rax, implicit-def $rdx, implicit-def $eflags, implicit $rax, implicit $rdx - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rdx - ; CHECK-NEXT: $rax = COPY [[COPY2]] - ; CHECK-NEXT: RET 0, implicit $rax - %0:gpr(s64) = COPY $rdi - %1:gpr(s64) = COPY $rsi - %2:gpr(s64) = G_SREM %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-udiv.mir deleted file mode 100644 index 71c03fd6e28fd1..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-udiv.mir +++ /dev/null @@ -1,267 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'udiv.ll' - source_filename = "udiv.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) { - %res = udiv i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_udiv_i16(i16 %arg1, i16 %arg2) { - %res = udiv i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_udiv_i32(i32 %arg1, i32 %arg2) { - %res = udiv i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_udiv_i64(i64 %arg1, i64 %arg2) { - %res = udiv i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_udiv_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY2]].sub_8bit - ; CHECK: $ax = MOVZX16rr8 [[COPY1]] - ; CHECK: DIV8r [[COPY3]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK: [[COPY4:%[0-9]+]]:gr8 = COPY $al - ; CHECK: $al = COPY [[COPY4]] - ; CHECK: RET 0, implicit $al - %2:gpr(s32) = COPY $edi - %0:gpr(s8) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s8) = G_TRUNC %3(s32) - %4:gpr(s8) = G_UDIV %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_udiv_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK: $ax = COPY [[COPY1]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $dx = COPY [[MOV32r0_]].sub_16bit - ; CHECK: DIV16r [[COPY3]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK: [[COPY4:%[0-9]+]]:gr16 = COPY $ax - ; CHECK: $ax = COPY [[COPY4]] - ; CHECK: RET 0, implicit $ax - %2:gpr(s32) = COPY $edi - %0:gpr(s16) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s16) = G_TRUNC %3(s32) - %4:gpr(s16) = G_UDIV %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_udiv_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_udiv_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK: $eax = COPY [[COPY]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $edx = COPY [[MOV32r0_]] - ; CHECK: DIV32r [[COPY1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $eax - ; CHECK: $eax = COPY [[COPY2]] - ; CHECK: RET 0, implicit $eax - %0:gpr(s32) = COPY $edi - %1:gpr(s32) = COPY $esi - %2:gpr(s32) = G_UDIV %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_udiv_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_udiv_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK: $rax = COPY [[COPY]] - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: $rdx = SUBREG_TO_REG 0, [[MOV32r0_]], %subreg.sub_32bit - ; CHECK: DIV64r [[COPY1]], implicit-def $rax, implicit-def $rdx, implicit-def $eflags, implicit $rax, implicit $rdx - ; CHECK: [[COPY2:%[0-9]+]]:gr64 = COPY $rax - ; CHECK: $rax = COPY [[COPY2]] - ; CHECK: RET 0, implicit $rax - %0:gpr(s64) = COPY $rdi - %1:gpr(s64) = COPY $rsi - %2:gpr(s64) = G_UDIV %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-urem.mir deleted file mode 100644 index 657cf499949734..00000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-urem.mir +++ /dev/null @@ -1,273 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s - ---- | - ; ModuleID = 'urem.ll' - source_filename = "urem.ll" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @test_urem_i8(i8 %arg1, i8 %arg2) { - %res = urem i8 %arg1, %arg2 - ret i8 %res - } - - define i16 @test_urem_i16(i16 %arg1, i16 %arg2) { - %res = urem i16 %arg1, %arg2 - ret i16 %res - } - - define i32 @test_urem_i32(i32 %arg1, i32 %arg2) { - %res = urem i32 %arg1, %arg2 - ret i32 %res - } - - define i64 @test_urem_i64(i64 %arg1, i64 %arg2) { - %res = urem i64 %arg1, %arg2 - ret i64 %res - } - -... ---- -name: test_urem_i8 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i8 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY2]].sub_8bit - ; CHECK-NEXT: $ax = MOVZX16rr8 [[COPY1]] - ; CHECK-NEXT: DIV8r [[COPY3]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY $ax - ; CHECK-NEXT: [[SHR16ri:%[0-9]+]]:gr16 = SHR16ri [[COPY4]], 8, implicit-def $eflags - ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr8 = SUBREG_TO_REG 0, [[SHR16ri]], %subreg.sub_8bit - ; CHECK-NEXT: $al = COPY [[SUBREG_TO_REG]] - ; CHECK-NEXT: RET 0, implicit $al - %2:gpr(s32) = COPY $edi - %0:gpr(s8) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s8) = G_TRUNC %3(s32) - %4:gpr(s8) = G_UREM %0, %1 - $al = COPY %4(s8) - RET 0, implicit $al - -... ---- -name: test_urem_i16 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i16 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit - ; CHECK-NEXT: $ax = COPY [[COPY1]] - ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK-NEXT: $dx = COPY [[MOV32r0_]].sub_16bit - ; CHECK-NEXT: DIV16r [[COPY3]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY $dx - ; CHECK-NEXT: $ax = COPY [[COPY4]] - ; CHECK-NEXT: RET 0, implicit $ax - %2:gpr(s32) = COPY $edi - %0:gpr(s16) = G_TRUNC %2(s32) - %3:gpr(s32) = COPY $esi - %1:gpr(s16) = G_TRUNC %3(s32) - %4:gpr(s16) = G_UREM %0, %1 - $ax = COPY %4(s16) - RET 0, implicit $ax - -... ---- -name: test_urem_i32 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi, $esi - - ; CHECK-LABEL: name: test_urem_i32 - ; CHECK: liveins: $edi, $esi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: $eax = COPY [[COPY]] - ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK-NEXT: $edx = COPY [[MOV32r0_]] - ; CHECK-NEXT: DIV32r [[COPY1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edx - ; CHECK-NEXT: $eax = COPY [[COPY2]] - ; CHECK-NEXT: RET 0, implicit $eax - %0:gpr(s32) = COPY $edi - %1:gpr(s32) = COPY $esi - %2:gpr(s32) = G_UREM %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - -... ---- -name: test_urem_i64 -alignment: 16 -exposesReturnsTwice: false -legalized: true -regBankSelected: true -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi, $rsi - - ; CHECK-LABEL: name: test_urem_i64 - ; CHECK: liveins: $rdi, $rsi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK-NEXT: $rax = COPY [[COPY]] - ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK-NEXT: $rdx = SUBREG_TO_REG 0, [[MOV32r0_]], %subreg.sub_32bit - ; CHECK-NEXT: DIV64r [[COPY1]], implicit-def $rax, implicit-def $rdx, implicit-def $eflags, implicit $rax, implicit $rdx - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rdx - ; CHECK-NEXT: $rax = COPY [[COPY2]] - ; CHECK-NEXT: RET 0, implicit $rax - %0:gpr(s64) = COPY $rdi - %1:gpr(s64) = COPY $rsi - %2:gpr(s64) = G_UREM %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - -... diff --git a/llvm/test/CodeGen/X86/combine-smin.ll b/llvm/test/CodeGen/X86/combine-smin.ll index 87ae495f945e0a..b58934256a2092 100644 --- a/llvm/test/CodeGen/X86/combine-smin.ll +++ b/llvm/test/CodeGen/X86/combine-smin.ll @@ -70,9 +70,6 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { ; SSE2-LABEL: test_v16i8_reassociation: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -81,21 +78,18 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { ; SSE41: # %bb.0: ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pminsb %xmm1, %xmm0 -; SSE41-NEXT: pminsb %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: test_v16i8_reassociation: ; SSE42: # %bb.0: ; SSE42-NEXT: pxor %xmm1, %xmm1 ; SSE42-NEXT: pminsb %xmm1, %xmm0 -; SSE42-NEXT: pminsb %xmm1, %xmm0 ; SSE42-NEXT: retq ; ; AVX-LABEL: test_v16i8_reassociation: ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) %2 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer) diff --git a/llvm/test/CodeGen/X86/combine-umax.ll b/llvm/test/CodeGen/X86/combine-umax.ll index 52bb9ee7fcb9f5..25f8ec891a2472 100644 --- a/llvm/test/CodeGen/X86/combine-umax.ll +++ b/llvm/test/CodeGen/X86/combine-umax.ll @@ -45,16 +45,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) { define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { ; SSE-LABEL: test_v16i8_reassociation: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; SSE-NEXT: pmaxub %xmm1, %xmm0 -; SSE-NEXT: pmaxub %xmm1, %xmm0 +; SSE-NEXT: pmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8_reassociation: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> ) %2 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %1, <16 x i8> ) diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll index 5b3b7f942805d9..76dbcb50bf8c7c 100644 --- a/llvm/test/CodeGen/X86/combine-umin.ll +++ b/llvm/test/CodeGen/X86/combine-umin.ll @@ -62,16 +62,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) { define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { ; SSE-LABEL: test_v16i8_reassociation: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; SSE-NEXT: pminub %xmm1, %xmm0 -; SSE-NEXT: pminub %xmm1, %xmm0 +; SSE-NEXT: pminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8_reassociation: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> ) %2 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> ) diff --git a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll new file mode 100644 index 00000000000000..527995bc2139ec --- /dev/null +++ b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll @@ -0,0 +1,714 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 + +declare void @use.v4.i32(<4 x i32>) + +define <4 x i32> @eq_or_eq_ult_2(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_2: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5] +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2: +; SSE41: # %bb.0: +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [6,6,6,6] +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp ult <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @eq_or_eq_ult_2_only_transform_sse2(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_only_transform_sse2: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2_only_transform_sse2: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2_only_transform_sse2: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_only_transform_sse2: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: paddd %xmm1, %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [1,1,1,1] +; SSE41-NEXT: pminud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_only_transform_sse2: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,2,2,2] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp ult <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @eq_or_eq_ult_2_fail_multiuse(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_fail_multiuse: +; AVX512: # %bb.0: +; AVX512-NEXT: subq $24, %rsp +; AVX512-NEXT: .cfi_def_cfa_offset 32 +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: callq use.v4.i32@PLT +; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vmovdqa32 {{.*#+}} xmm0 {%k1} {z} = [4294967295,4294967295,4294967295,4294967295] +; AVX512-NEXT: addq $24, %rsp +; AVX512-NEXT: .cfi_def_cfa_offset 8 +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2_fail_multiuse: +; AVX1: # %bb.0: +; AVX1-NEXT: subq $24, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 32 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX1-NEXT: callq use.v4.i32@PLT +; AVX1-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: addq $24, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 8 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2_fail_multiuse: +; AVX2: # %bb.0: +; AVX2-NEXT: subq $24, %rsp +; AVX2-NEXT: .cfi_def_cfa_offset 32 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX2-NEXT: callq use.v4.i32@PLT +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1] +; AVX2-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX2-NEXT: vpminud %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: addq $24, %rsp +; AVX2-NEXT: .cfi_def_cfa_offset 8 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_fail_multiuse: +; SSE41: # %bb.0: +; SSE41-NEXT: subq $24, %rsp +; SSE41-NEXT: .cfi_def_cfa_offset 32 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: paddd %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill +; SSE41-NEXT: callq use.v4.i32@PLT +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [1,1,1,1] +; SSE41-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload +; SSE41-NEXT: pminud %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: addq $24, %rsp +; SSE41-NEXT: .cfi_def_cfa_offset 8 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_fail_multiuse: +; SSE2: # %bb.0: +; SSE2-NEXT: subq $24, %rsp +; SSE2-NEXT: .cfi_def_cfa_offset 32 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: paddd %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: callq use.v4.i32@PLT +; SSE2-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483650,2147483650,2147483650,2147483650] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: addq $24, %rsp +; SSE2-NEXT: .cfi_def_cfa_offset 8 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + call void @use.v4.i32(<4 x i32> %x_adj) + %cmp = icmp ult <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @eq_or_eq_ult_3_fail(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_3_fail: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_3_fail: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_3_fail: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_3_fail: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: paddd %xmm1, %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [2,2,2,2] +; SSE41-NEXT: pminud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_3_fail: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: paddd %xmm0, %xmm1 +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651,2147483651,2147483651] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp ult <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @eq_or_eq_ugt_m3(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ugt_m3: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX-LABEL: eq_or_eq_ugt_m3: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ugt_m3: +; SSE41: # %bb.0: +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [9,12,9,9] +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ugt_m3: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9,12,9,9] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp ugt <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @eq_or_eq_ule_1(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ule_1: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpcmpleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX-LABEL: eq_or_eq_ule_1: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ule_1: +; SSE41: # %bb.0: +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [0,4294967295,4294967294,4294967293] +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ule_1: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,4294967294,4294967293] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp ule <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <2 x i64> @eq_or_eq_uge_m2_i64(<2 x i64> %x) { +; AVX512-LABEL: eq_or_eq_uge_m2_i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpcmpnltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX-LABEL: eq_or_eq_uge_m2_i64: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_uge_m2_i64: +; SSE41: # %bb.0: +; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [18446744073709551613,18446744073709551612] +; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_uge_m2_i64: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551613,18446744073709551612] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <2 x i64> %x, + %cmp = icmp uge <2 x i64> %x_adj, + %r = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %r +} + + +define <2 x i64> @eq_or_eq_uge_m2_i64_m1(<2 x i64> %x) { +; AVX512-LABEL: eq_or_eq_uge_m2_i64_m1: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpcmpnltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 +; AVX512-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX-LABEL: eq_or_eq_uge_m2_i64_m1: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_uge_m2_i64_m1: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_uge_m2_i64_m1: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <2 x i64> %x, + %cmp = icmp uge <2 x i64> %x_adj, + %r = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %r +} + +define <4 x i32> @eq_or_eq_uge_2_fail_(<4 x i32> %x) { +; AVX512-LABEL: eq_or_eq_uge_2_fail_: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpcmpnltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_uge_2_fail_: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_uge_2_fail_: +; AVX2: # %bb.0: +; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_uge_2_fail_: +; SSE41: # %bb.0: +; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [2,2,2,2] +; SSE41-NEXT: pmaxud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_uge_2_fail_: +; SSE2: # %bb.0: +; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650,2147483650,2147483650] +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <4 x i32> %x, + %cmp = icmp uge <4 x i32> %x_adj, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + + +define <8 x i32> @eq_or_eq_ult_2_256(<8 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_256: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2_256: +; AVX1: # %bb.0: +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [4294967291,4294967291,4294967291,4294967291] +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] +; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpminud %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2_256: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [5,5,5,5,5,5,5,5] +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_256: +; SSE41: # %bb.0: +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm2 = [6,6,6,6] +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [5,5,5,5] +; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE41-NEXT: por %xmm2, %xmm1 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_256: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [6,6,6,6] +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [5,5,5,5] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: retq + %x_adj = add <8 x i32> %x, + %cmp = icmp ult <8 x i32> %x_adj, + %r = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %r +} + + +define <8 x i32> @eq_or_eq_ult_2_256_m1(<8 x i32> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_256_m1: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; AVX512-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2_256_m1: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] +; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpminud %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2_256_m1: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_256_m1: +; SSE41: # %bb.0: +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm2 = [2,2,2,2] +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [1,1,1,1] +; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE41-NEXT: por %xmm2, %xmm1 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_256_m1: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2,2,2,2] +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: retq + %x_adj = add <8 x i32> %x, + %cmp = icmp ult <8 x i32> %x_adj, + %r = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %r +} + + +define <32 x i8> @eq_or_eq_ult_2_256_i8_m1(<32 x i8> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_256_i8_m1: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq +; +; AVX1-LABEL: eq_or_eq_ult_2_256_i8_m1: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpminub %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: eq_or_eq_ult_2_256_i8_m1: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_256_i8_m1: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: pcmpeqb %xmm2, %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: pcmpeqb %xmm4, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: pcmpeqb %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqb %xmm4, %xmm1 +; SSE41-NEXT: por %xmm2, %xmm1 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_256_i8_m1: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pcmpeqb %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE2-NEXT: pcmpeqb %xmm4, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pcmpeqb %xmm1, %xmm2 +; SSE2-NEXT: pcmpeqb %xmm4, %xmm1 +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: retq + %x_adj = add <32 x i8> %x, + %cmp = icmp ult <32 x i8> %x_adj, + %r = sext <32 x i1> %cmp to <32 x i8> + ret <32 x i8> %r +} + + +define <16 x i8> @eq_or_eq_ult_2_128_i8_m1(<16 x i8> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_128_i8_m1: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: eq_or_eq_ult_2_128_i8_m1: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_128_i8_m1: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: paddb %xmm1, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_128_i8_m1: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: paddb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE2-NEXT: pminub %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <16 x i8> %x, + %cmp = icmp ult <16 x i8> %x_adj, + %r = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %r +} + + +define <16 x i8> @eq_or_eq_ult_2_128_i8(<16 x i8> %x) { +; AVX512-LABEL: eq_or_eq_ult_2_128_i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: eq_or_eq_ult_2_128_i8: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; SSE41-LABEL: eq_or_eq_ult_2_128_i8: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [232,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232] +; SSE41-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_eq_ult_2_128_i8: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [232,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232] +; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: retq + %x_adj = add <16 x i8> %x, + %cmp = icmp ult <16 x i8> %x_adj, + %r = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %r +} diff --git a/llvm/test/CodeGen/X86/isel-sdiv.ll b/llvm/test/CodeGen/X86/isel-sdiv.ll new file mode 100644 index 00000000000000..6a6b2da8dc2f8d --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-sdiv.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -global-isel=0 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -global-isel=0 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,GISEL-X86 + +define i8 @test_sdiv_i8(i8 %arg1, i8 %arg2) nounwind { +; X64-LABEL: test_sdiv_i8: +; X64: # %bb.0: +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: idivb %sil +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_sdiv_i8: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: idivb {{[0-9]+}}(%esp) +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_sdiv_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: cbtw +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: idivb %cl +; GISEL-X86-NEXT: retl + %ret = sdiv i8 %arg1, %arg2 + ret i8 %ret +} + +define i16 @test_sdiv_i16(i16 %arg1, i16 %arg2) nounwind { +; X64-LABEL: test_sdiv_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: cwtd +; X64-NEXT: idivw %si +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_sdiv_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: cwtd +; DAG-X86-NEXT: idivw {{[0-9]+}}(%esp) +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_sdiv_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: cwtd +; GISEL-X86-NEXT: idivw %cx +; GISEL-X86-NEXT: retl + %ret = sdiv i16 %arg1, %arg2 + ret i16 %ret +} + +define i32 @test_sdiv_i32(i32 %arg1, i32 %arg2) nounwind { +; X64-LABEL: test_sdiv_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cltd +; X64-NEXT: idivl %esi +; X64-NEXT: retq +; +; X86-LABEL: test_sdiv_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cltd +; X86-NEXT: idivl {{[0-9]+}}(%esp) +; X86-NEXT: retl + %ret = sdiv i32 %arg1, %arg2 + ret i32 %ret +} + +define i64 @test_sdiv_i64(i64 %arg1, i64 %arg2) nounwind { +; X64-LABEL: test_sdiv_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: cqto +; X64-NEXT: idivq %rsi +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_sdiv_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: subl $12, %esp +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: calll __divdi3 +; DAG-X86-NEXT: addl $28, %esp +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_sdiv_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll __divdi3 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl + %ret = sdiv i64 %arg1, %arg2 + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/isel-srem.ll b/llvm/test/CodeGen/X86/isel-srem.ll new file mode 100644 index 00000000000000..56716e10a9d996 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-srem.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -global-isel=0 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,SDAG-X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,FAST-X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,GISEL-X64 +; RUN: llc < %s -global-isel=0 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86,SDAG-X86 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86,FAST-X86 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,GISEL-X86 + +define i8 @test_srem_i8(i8 %arg1, i8 %arg2) nounwind { +; SDAG-X64-LABEL: test_srem_i8: +; SDAG-X64: # %bb.0: +; SDAG-X64-NEXT: movsbl %dil, %eax +; SDAG-X64-NEXT: idivb %sil +; SDAG-X64-NEXT: movsbl %ah, %eax +; SDAG-X64-NEXT: # kill: def $al killed $al killed $eax +; SDAG-X64-NEXT: retq +; +; FAST-X64-LABEL: test_srem_i8: +; FAST-X64: # %bb.0: +; FAST-X64-NEXT: movsbl %dil, %eax +; FAST-X64-NEXT: idivb %sil +; FAST-X64-NEXT: shrw $8, %ax +; FAST-X64-NEXT: # kill: def $al killed $al killed $ax +; FAST-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_srem_i8: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movsbl %dil, %eax +; GISEL-X64-NEXT: idivb %sil +; GISEL-X64-NEXT: shrw $8, %ax +; GISEL-X64-NEXT: # kill: def $al killed $al killed $ax +; GISEL-X64-NEXT: retq +; +; SDAG-X86-LABEL: test_srem_i8: +; SDAG-X86: # %bb.0: +; SDAG-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; SDAG-X86-NEXT: idivb {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movsbl %ah, %eax +; SDAG-X86-NEXT: # kill: def $al killed $al killed $eax +; SDAG-X86-NEXT: retl +; +; FAST-X86-LABEL: test_srem_i8: +; FAST-X86: # %bb.0: +; FAST-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; FAST-X86-NEXT: idivb {{[0-9]+}}(%esp) +; FAST-X86-NEXT: movb %ah, %al +; FAST-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_srem_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: cbtw +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: idivb %cl +; GISEL-X86-NEXT: movb %ah, %al +; GISEL-X86-NEXT: retl + %ret = srem i8 %arg1, %arg2 + ret i8 %ret +} + +define i16 @test_srem_i16(i16 %arg1, i16 %arg2) nounwind { +; X64-LABEL: test_srem_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: cwtd +; X64-NEXT: idivw %si +; X64-NEXT: movl %edx, %eax +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_srem_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: cwtd +; DAG-X86-NEXT: idivw {{[0-9]+}}(%esp) +; DAG-X86-NEXT: movl %edx, %eax +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_srem_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: cwtd +; GISEL-X86-NEXT: idivw %cx +; GISEL-X86-NEXT: movl %edx, %eax +; GISEL-X86-NEXT: retl + %ret = srem i16 %arg1, %arg2 + ret i16 %ret +} + +define i32 @test_srem_i32(i32 %arg1, i32 %arg2) nounwind { +; X64-LABEL: test_srem_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cltd +; X64-NEXT: idivl %esi +; X64-NEXT: movl %edx, %eax +; X64-NEXT: retq +; +; X86-LABEL: test_srem_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cltd +; X86-NEXT: idivl {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: retl + %ret = srem i32 %arg1, %arg2 + ret i32 %ret +} + +define i64 @test_srem_i64(i64 %arg1, i64 %arg2) nounwind { +; X64-LABEL: test_srem_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: cqto +; X64-NEXT: idivq %rsi +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_srem_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: subl $12, %esp +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: calll __moddi3 +; DAG-X86-NEXT: addl $28, %esp +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_srem_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll __moddi3 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl + %ret = srem i64 %arg1, %arg2 + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/isel-udiv.ll b/llvm/test/CodeGen/X86/isel-udiv.ll new file mode 100644 index 00000000000000..b56b8b112fe471 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-udiv.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -global-isel=0 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -global-isel=0 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,GISEL-X86 + +define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) nounwind { +; X64-LABEL: test_udiv_i8: +; X64: # %bb.0: +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: divb %sil +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_udiv_i8: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: divb {{[0-9]+}}(%esp) +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_udiv_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movzbl %al, %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: divb %cl +; GISEL-X86-NEXT: retl + %ret = udiv i8 %arg1, %arg2 + ret i8 %ret +} + +define i16 @test_udiv_i16(i16 %arg1, i16 %arg2) nounwind { +; X64-LABEL: test_udiv_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divw %si +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_udiv_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: xorl %edx, %edx +; DAG-X86-NEXT: divw {{[0-9]+}}(%esp) +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_udiv_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: divw %cx +; GISEL-X86-NEXT: retl + %ret = udiv i16 %arg1, %arg2 + ret i16 %ret +} + +define i32 @test_udiv_i32(i32 %arg1, i32 %arg2) nounwind { +; X64-LABEL: test_udiv_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %esi +; X64-NEXT: retq +; +; X86-LABEL: test_udiv_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl {{[0-9]+}}(%esp) +; X86-NEXT: retl + %ret = udiv i32 %arg1, %arg2 + ret i32 %ret +} + +define i64 @test_udiv_i64(i64 %arg1, i64 %arg2) nounwind { +; X64-LABEL: test_udiv_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divq %rsi +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_udiv_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: subl $12, %esp +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: calll __udivdi3 +; DAG-X86-NEXT: addl $28, %esp +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_udiv_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll __udivdi3 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl + %ret = udiv i64 %arg1, %arg2 + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/isel-urem.ll b/llvm/test/CodeGen/X86/isel-urem.ll new file mode 100644 index 00000000000000..50b9c1250ff875 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-urem.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -global-isel=0 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,SDAG-X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,FAST-X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,GISEL-X64 +; RUN: llc < %s -global-isel=0 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86,SDAG-X86 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86,FAST-X86 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,GISEL-X86 + +define i8 @test_urem_i8(i8 %arg1, i8 %arg2) nounwind { +; SDAG-X64-LABEL: test_urem_i8: +; SDAG-X64: # %bb.0: +; SDAG-X64-NEXT: movzbl %dil, %eax +; SDAG-X64-NEXT: divb %sil +; SDAG-X64-NEXT: movzbl %ah, %eax +; SDAG-X64-NEXT: # kill: def $al killed $al killed $eax +; SDAG-X64-NEXT: retq +; +; FAST-X64-LABEL: test_urem_i8: +; FAST-X64: # %bb.0: +; FAST-X64-NEXT: movzbl %dil, %eax +; FAST-X64-NEXT: divb %sil +; FAST-X64-NEXT: shrw $8, %ax +; FAST-X64-NEXT: # kill: def $al killed $al killed $ax +; FAST-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_urem_i8: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movzbl %dil, %eax +; GISEL-X64-NEXT: divb %sil +; GISEL-X64-NEXT: shrw $8, %ax +; GISEL-X64-NEXT: # kill: def $al killed $al killed $ax +; GISEL-X64-NEXT: retq +; +; SDAG-X86-LABEL: test_urem_i8: +; SDAG-X86: # %bb.0: +; SDAG-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SDAG-X86-NEXT: divb {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movzbl %ah, %eax +; SDAG-X86-NEXT: # kill: def $al killed $al killed $eax +; SDAG-X86-NEXT: retl +; +; FAST-X86-LABEL: test_urem_i8: +; FAST-X86: # %bb.0: +; FAST-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; FAST-X86-NEXT: divb {{[0-9]+}}(%esp) +; FAST-X86-NEXT: movb %ah, %al +; FAST-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_urem_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movzbl %al, %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: divb %cl +; GISEL-X86-NEXT: movb %ah, %al +; GISEL-X86-NEXT: retl + %ret = urem i8 %arg1, %arg2 + ret i8 %ret +} + +define i16 @test_urem_i16(i16 %arg1, i16 %arg2) nounwind { +; X64-LABEL: test_urem_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divw %si +; X64-NEXT: movl %edx, %eax +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_urem_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: xorl %edx, %edx +; DAG-X86-NEXT: divw {{[0-9]+}}(%esp) +; DAG-X86-NEXT: movl %edx, %eax +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_urem_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: divw %cx +; GISEL-X86-NEXT: movl %edx, %eax +; GISEL-X86-NEXT: retl + %ret = urem i16 %arg1, %arg2 + ret i16 %ret +} + +define i32 @test_urem_i32(i32 %arg1, i32 %arg2) nounwind { +; X64-LABEL: test_urem_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %esi +; X64-NEXT: movl %edx, %eax +; X64-NEXT: retq +; +; X86-LABEL: test_urem_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: retl + %ret = urem i32 %arg1, %arg2 + ret i32 %ret +} + +define i64 @test_urem_i64(i64 %arg1, i64 %arg2) nounwind { +; X64-LABEL: test_urem_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divq %rsi +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: retq +; +; DAG-X86-LABEL: test_urem_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: subl $12, %esp +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: pushl {{[0-9]+}}(%esp) +; DAG-X86-NEXT: calll __umoddi3 +; DAG-X86-NEXT: addl $28, %esp +; DAG-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_urem_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll __umoddi3 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl + %ret = urem i64 %arg1, %arg2 + ret i64 %ret +} diff --git a/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test b/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test index 6c049af43efe74..2cd281c8d0af9f 100644 --- a/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test +++ b/llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test @@ -12,15 +12,15 @@ ; FRAMES-NEXT: DW_CFA_nop: ; FRAMES: 00000014 00000010 00000000 FDE cie=00000000 pc=00000000...00000022 -; FRAMES: DW_CFA_advance_loc: 3 +; FRAMES: DW_CFA_advance_loc: 3 to 0x3 ; FRAMES-NEXT: DW_CFA_def_cfa_offset: +12 ; FRAMES-NEXT: DW_CFA_nop: ; FRAMES: 00000028 00000014 00000000 FDE cie=00000000 pc=00000030...00000080 -; FRAMES: DW_CFA_advance_loc: 1 +; FRAMES: DW_CFA_advance_loc: 1 to 0x31 ; FRAMES-NEXT: DW_CFA_def_cfa_offset: +8 ; FRAMES-NEXT: DW_CFA_offset: {{reg5|EBP}} -8 -; FRAMES-NEXT: DW_CFA_advance_loc: 2 +; FRAMES-NEXT: DW_CFA_advance_loc: 2 to 0x33 ; FRAMES-NEXT: DW_CFA_def_cfa_register: {{reg5|EBP}} ; FRAMES-NOT: CIE diff --git a/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll b/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll new file mode 100644 index 00000000000000..b15b76d1690c41 --- /dev/null +++ b/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll @@ -0,0 +1,94 @@ +;; Test that we can write in the old debug info format. +; RUN: opt --passes=verify -S --write-experimental-debuginfo=false < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,OLDDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg + +;; Test that we can write in the new debug info format... +; RUN: opt --passes=verify -S --write-experimental-debuginfo=true < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,NEWDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg + +;; ...and then read the new format and write the old format. +; RUN: opt --passes=verify -S --write-experimental-debuginfo=true < %s \ +; RUN: | opt --passes=verify -S --write-experimental-debuginfo=false \ +; RUN: | FileCheck %s --check-prefixes=CHECK,OLDDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg + +;; Test also that the new flag is independent of the flag that enables use of +;; these non-instruction debug info during LLVM passes. +; RUN: opt --passes=verify -S --try-experimental-debuginfo-iterators --write-experimental-debuginfo=false < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,OLDDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg +; RUN: opt --passes=verify -S --try-experimental-debuginfo-iterators --write-experimental-debuginfo=true < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,NEWDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg + +; CHECK: @f(i32 %[[VAL_A:[0-9a-zA-Z]+]]) +; CHECK-NEXT: entry: +; OLDDBG-NEXT: call void @llvm.dbg.value(metadata i32 %[[VAL_A]], metadata ![[VAR_A:[0-9]+]], metadata !DIExpression()), !dbg ![[LOC_1:[0-9]+]] +; NEWDBG-NEXT: {{^}} #dbg_value(i32 %[[VAL_A]], ![[VAR_A:[0-9]+]], !DIExpression(), ![[LOC_1:[0-9]+]]) +; CHECK-NEXT: {{^}} %[[VAL_B:[0-9a-zA-Z]+]] = alloca +; OLDDBG-NEXT: call void @llvm.dbg.declare(metadata ptr %[[VAL_B]], metadata ![[VAR_B:[0-9]+]], metadata !DIExpression()), !dbg ![[LOC_2:[0-9]+]] +; NEWDBG-NEXT: {{^}} #dbg_declare(ptr %[[VAL_B]], ![[VAR_B:[0-9]+]], !DIExpression(), ![[LOC_2:[0-9]+]]) +; CHECK-NEXT: {{^}} %[[VAL_ADD:[0-9a-zA-Z]+]] = add i32 %[[VAL_A]], 5 +; OLDDBG-NEXT: call void @llvm.dbg.value(metadata !DIArgList(i32 %[[VAL_A]], i32 %[[VAL_ADD]]), metadata ![[VAR_A]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus)), !dbg ![[LOC_3:[0-9]+]] +; NEWDBG-NEXT: {{^}} #dbg_value(!DIArgList(i32 %[[VAL_A]], i32 %[[VAL_ADD]]), ![[VAR_A]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus), ![[LOC_3:[0-9]+]]) +; OLDDBG-NEXT: call void @llvm.dbg.label(metadata ![[LABEL_ID:[0-9]+]]), !dbg ![[LOC_3]] +; NEWDBG-NEXT: {{^}} #dbg_label(![[LABEL_ID:[0-9]+]], ![[LOC_3]]) +; CHECK-NEXT: {{^}} store i32 %[[VAL_ADD]]{{.+}}, !DIAssignID ![[ASSIGNID:[0-9]+]] +; OLDDBG-NEXT: call void @llvm.dbg.assign(metadata i32 %[[VAL_ADD]], metadata ![[VAR_B]], metadata !DIExpression(), metadata ![[ASSIGNID]], metadata ptr %[[VAL_B]], metadata !DIExpression()), !dbg ![[LOC_4:[0-9]+]] +; NEWDBG-NEXT: {{^}} #dbg_assign(i32 %[[VAL_ADD]], ![[VAR_B]], !DIExpression(), ![[ASSIGNID]], ptr %[[VAL_B]], !DIExpression(), ![[LOC_4:[0-9]+]]) +; CHECK-NEXT: {{^}} ret i32 + +; OLDDBG-DAG: declare void @llvm.dbg.value +; OLDDBG-DAG: declare void @llvm.dbg.declare +; OLDDBG-DAG: declare void @llvm.dbg.assign +; OLDDBG-DAG: declare void @llvm.dbg.label + +; CHECK-DAG: llvm.dbg.cu +; CHECK-DAG: ![[VAR_A]] = !DILocalVariable(name: "a" +; CHECK-DAG: ![[VAR_B]] = !DILocalVariable(name: "b" +; CHECK-DAG: ![[LOC_1]] = !DILocation(line: 3, column: 15 +; CHECK-DAG: ![[LOC_2]] = !DILocation(line: 3, column: 20 +; CHECK-DAG: ![[LOC_3]] = !DILocation(line: 3, column: 25 +; CHECK-DAG: ![[LOC_4]] = !DILocation(line: 3, column: 30 +; CHECK-DAG: ![[LABEL_ID]] = !DILabel( + +define dso_local i32 @f(i32 %a) !dbg !7 { +entry: + call void @llvm.dbg.value(metadata i32 %a, metadata !20, metadata !DIExpression()), !dbg !30 + %b = alloca i32, !dbg !30, !DIAssignID !40 + call void @llvm.dbg.declare(metadata ptr %b, metadata !21, metadata !DIExpression()), !dbg !31 + %add = add i32 %a, 5, !dbg !31 + call void @llvm.dbg.value(metadata !DIArgList(i32 %a, i32 %add), metadata !20, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus)), !dbg !32 + call void @llvm.dbg.label(metadata !50), !dbg !32 + store i32 %add, ptr %b, !dbg !32, !DIAssignID !40 + call void @llvm.dbg.assign(metadata i32 %add, metadata !21, metadata !DIExpression(), metadata !40, metadata ptr %b, metadata !DIExpression()), !dbg !33 + ret i32 %add, !dbg !33 + +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) +declare void @llvm.dbg.label(metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "print.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 18.0.0"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !13) +!8 = !DISubroutineType(types: !9) +!9 = !{!12, !12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{!20, !21} +!20 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 3, type: !12) +!21 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 3, type: !12) +!30 = !DILocation(line: 3, column: 15, scope: !7) +!31 = !DILocation(line: 3, column: 20, scope: !7) +!32 = !DILocation(line: 3, column: 25, scope: !7) +!33 = !DILocation(line: 3, column: 30, scope: !7) +!40 = distinct !DIAssignID() +!50 = !DILabel(scope: !7, name: "label", file: !1, line: 3) \ No newline at end of file diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s b/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s new file mode 100644 index 00000000000000..1c95bde51e1211 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s @@ -0,0 +1,52 @@ +# REQUIRES: native && x86_64-linux && intel-jitevents + +# RUN: rm -rf %t && mkdir %t +# RUN: llvm-mc -triple=x86_64-unknown-linux \ +# RUN: -filetype=obj -o %t/ELF_x86-64_vtune.o %s +# RUN: llvm-jitlink -vtune-support %t/ELF_x86-64_vtune.o | \ +# RUN: FileCheck %s + +# CHECK: Method load [0]: {{.*}}, Size = {{[0-9]+}} +# CHECK: Method unload [0] + .file "test.c" + .text + .globl main + .type main, @function +main: +.LFB0: + .cfi_startproc + endbr64 + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset 6, -16 + movq %rsp, %rbp + .cfi_def_cfa_register 6 + movl %edi, -4(%rbp) + movq %rsi, -16(%rbp) + movl -4(%rbp), %ebx + addl $1, %ebx + movl $0, %eax + popq %rbp + .cfi_def_cfa 7, 8 + ret + .cfi_endproc +.LFE0: + .size main, .-main + .ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0" + .section .note.GNU-stack,"",@progbits + .section .note.gnu.property,"a" + .align 8 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + .string "GNU" +1: + .align 8 + .long 0xc0000002 + .long 3f - 2f +2: + .long 0x3 +3: + .align 8 +4: diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/lit.local.cfg b/llvm/test/ExecutionEngine/JITLink/x86-64/lit.local.cfg index 42bf50dcc13c35..d5a1ad626b657d 100644 --- a/llvm/test/ExecutionEngine/JITLink/x86-64/lit.local.cfg +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/lit.local.cfg @@ -1,2 +1,5 @@ if not "X86" in config.root.targets: config.unsupported = True + +if config.llvm_use_intel_jitevents: + config.available_features.add("intel-jitevents") diff --git a/llvm/test/Instrumentation/AddressSanitizer/aarch64be.ll b/llvm/test/Instrumentation/AddressSanitizer/aarch64be.ll index eb522a0f3f3173..aeb1b0e8ebe778 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/aarch64be.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/aarch64be.ll @@ -2,9 +2,9 @@ ; RUN: opt < %s -passes=asan -S -mtriple=aarch64_be-linux-gnu | FileCheck --check-prefix=CHECK-AARCH64BE %s ; REQUIRES: aarch64-registered-target -define i32 @read_4_bytes(i32* %a) sanitize_address { +define i32 @read_4_bytes(ptr %a) sanitize_address { entry: - %tmp1 = load i32, i32* %a, align 4 + %tmp1 = load i32, ptr %a, align 4 ret i32 %tmp1 } diff --git a/llvm/test/Instrumentation/AddressSanitizer/program-addrspace.ll b/llvm/test/Instrumentation/AddressSanitizer/program-addrspace.ll index adfe21135e7ada..1d5bfb09ead97c 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/program-addrspace.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/program-addrspace.ll @@ -16,7 +16,7 @@ target datalayout = "P1" define i1 @b(i64 %c) addrspace(1) { %cast = inttoptr i64 %c to ptr addrspace(42) - %cmp = icmp ugt ptr addrspace(42) %cast, getelementptr inbounds ([1 x i32], ptr addrspace(42) @a, i64 0, i64 0) + %cmp = icmp ugt ptr addrspace(42) %cast, @a ret i1 %cmp } diff --git a/llvm/test/Instrumentation/InstrProfiling/before-value-profile-lowering.ll b/llvm/test/Instrumentation/InstrProfiling/before-value-profile-lowering.ll index 5dfec433f4ecb3..870e74ccfdac46 100644 --- a/llvm/test/Instrumentation/InstrProfiling/before-value-profile-lowering.ll +++ b/llvm/test/Instrumentation/InstrProfiling/before-value-profile-lowering.ll @@ -7,17 +7,17 @@ target triple = "x86_64-unknown-linux-gnu" -declare void @llvm.instrprof.increment.step(i8*, i64, i32, i32, i64) +declare void @llvm.instrprof.increment.step(ptr, i64, i32, i32, i64) -declare void @llvm.instrprof.value.profile(i8*, i64, i64, i32, i32) +declare void @llvm.instrprof.value.profile(ptr, i64, i64, i32, i32) ; CHECK: @__profd_foo = private global @__profn_foo = private constant [3 x i8] c"foo" -define i32 @foo(i32 ()* ) { - %2 = ptrtoint i32 ()* %0 to i64 - call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i64 %2, i32 0, i32 0) - call void @llvm.instrprof.increment.step(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0, i64 0) +define i32 @foo(ptr ) { + %2 = ptrtoint ptr %0 to i64 + call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 0, i64 %2, i32 0, i32 0) + call void @llvm.instrprof.increment.step(ptr @__profn_foo, i64 0, i32 1, i32 0, i64 0) %3 = tail call i32 %0() ret i32 %3 } diff --git a/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll b/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll index ab9b664a2cff65..d40cc2ac02c1bc 100644 --- a/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll +++ b/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll @@ -6,11 +6,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK: @__profc_foo = private global [9 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF", section "__llvm_prf_cnts", comdat, align 8 define void @_Z3foov() { - call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 0) + call void @llvm.instrprof.timestamp(ptr @__profn_foo, i64 12345678, i32 9, i32 0) ; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo) - call void @llvm.instrprof.cover(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 8) + call void @llvm.instrprof.cover(ptr @__profn_foo, i64 12345678, i32 9, i32 8) ret void } -declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32) -declare void @llvm.instrprof.cover(i8*, i64, i32, i32) +declare void @llvm.instrprof.timestamp(ptr, i64, i32, i32) +declare void @llvm.instrprof.cover(ptr, i64, i32, i32) diff --git a/llvm/test/Instrumentation/InstrProfiling/timestamp.ll b/llvm/test/Instrumentation/InstrProfiling/timestamp.ll index aa2393695d6b85..c08ba4485fc5dd 100644 --- a/llvm/test/Instrumentation/InstrProfiling/timestamp.ll +++ b/llvm/test/Instrumentation/InstrProfiling/timestamp.ll @@ -6,11 +6,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK: @__profc_foo = private global [2 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 define void @_Z3foov() { - call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 0) + call void @llvm.instrprof.timestamp(ptr @__profn_foo, i64 12345678, i32 2, i32 0) ; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo) - call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 1) + call void @llvm.instrprof.increment(ptr @__profn_foo, i64 12345678, i32 2, i32 1) ret void } -declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32) -declare void @llvm.instrprof.increment(i8*, i64, i32, i32) +declare void @llvm.instrprof.timestamp(ptr, i64, i32, i32) +declare void @llvm.instrprof.increment(ptr, i64, i32, i32) diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir index 94d0ddad25944d..cb06d90ccd7fdf 100644 --- a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir @@ -1,4 +1,4 @@ -# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --- name: basic tracksRegLiveness: true diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir index 87cf3e604929bb..d935d8ea4be506 100644 --- a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir @@ -1,4 +1,4 @@ -# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --- name: cycles body: | diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir index c70a48bf21309e..7893837126e799 100644 --- a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir @@ -1,4 +1,4 @@ -# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --- name: mixed2 body: | diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir index 9e869acb3e9381..e9588d25d774d2 100644 --- a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir @@ -1,4 +1,4 @@ -# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --- name: region_nesting body: | diff --git a/llvm/test/MachineVerifier/test_g_splat_vector.mir b/llvm/test/MachineVerifier/test_g_splat_vector.mir new file mode 100644 index 00000000000000..0d1d8a3e6dcc64 --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_splat_vector.mir @@ -0,0 +1,27 @@ +# RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target +--- +name: g_splat_vector +tracksRegLiveness: true +liveins: +body: | + bb.0: + %0:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s32>) = G_IMPLICIT_DEF + %2:_() = G_IMPLICIT_DEF + + ; CHECK: Destination type must be a scalable vector + %3:_(s32) = G_SPLAT_VECTOR %0 + + ; CHECK: Destination type must be a scalable vector + %4:_(<2 x s32>) = G_SPLAT_VECTOR %0 + + ; CHECK: Source type must be a scalar + %5:_() = G_SPLAT_VECTOR %1 + + ; CHECK: Source type must be a scalar + %6:_() = G_SPLAT_VECTOR %2 + + ; CHECK: Element type of the destination must be the same type as the source type + %7:_() = G_SPLAT_VECTOR %0 +... diff --git a/llvm/test/Object/Inputs/small.ll b/llvm/test/Object/Inputs/small.ll index ef68a8c324a32f..677f20ade4c5bf 100644 --- a/llvm/test/Object/Inputs/small.ll +++ b/llvm/test/Object/Inputs/small.ll @@ -4,15 +4,15 @@ target triple = "i386-pc-windows" define i32 @main() nounwind { entry: - %call = tail call i32 @puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)) nounwind - tail call void bitcast (void (...)* @SomeOtherFunction to void ()*)() nounwind + %call = tail call i32 @puts(ptr @.str) nounwind + tail call void @SomeOtherFunction() nounwind ret i32 0 } -declare i32 @puts(i8* nocapture) nounwind +declare i32 @puts(ptr nocapture) nounwind declare void @SomeOtherFunction(...) @var = global i32 0 -@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @var to i8*)], section "llvm.metadata" -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* null, i8* null }] +@llvm.used = appending global [1 x ptr] [ptr @var], section "llvm.metadata" +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr null, ptr null }] diff --git a/llvm/test/Object/Inputs/trivial.ll b/llvm/test/Object/Inputs/trivial.ll index 82eabc6389fb84..1a6a76298b23ee 100644 --- a/llvm/test/Object/Inputs/trivial.ll +++ b/llvm/test/Object/Inputs/trivial.ll @@ -5,15 +5,15 @@ define i32 @main() nounwind { entry: - %call = tail call i32 @puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)) nounwind - tail call void bitcast (void (...)* @SomeOtherFunction to void ()*)() nounwind + %call = tail call i32 @puts(ptr @.str) nounwind + tail call void @SomeOtherFunction() nounwind ret i32 0 } -declare i32 @puts(i8* nocapture) nounwind +declare i32 @puts(ptr nocapture) nounwind declare void @SomeOtherFunction(...) @var = global i32 0 -@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @var to i8*)], section "llvm.metadata" -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* null, i8* null }] +@llvm.used = appending global [1 x ptr] [ptr @var], section "llvm.metadata" +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr null, ptr null }] diff --git a/llvm/test/Object/X86/irsymtab-bad-alias.ll b/llvm/test/Object/X86/irsymtab-bad-alias.ll index c54436d5921929..7f204d1dd15730 100644 --- a/llvm/test/Object/X86/irsymtab-bad-alias.ll +++ b/llvm/test/Object/X86/irsymtab-bad-alias.ll @@ -11,5 +11,5 @@ target triple = "x86_64-unknown-linux-gnu" @g1 = global i32 1 @g2 = global i32 2 -@a = alias i32, inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32), - i32 ptrtoint (i32* @g2 to i32)) to i32*) +@a = alias i32, inttoptr(i32 sub (i32 ptrtoint (ptr @g1 to i32), + i32 ptrtoint (ptr @g2 to i32)) to ptr) diff --git a/llvm/test/Object/X86/nm-ir.ll b/llvm/test/Object/X86/nm-ir.ll index e57c6d9a11c6e4..0324efb2948d17 100644 --- a/llvm/test/Object/X86/nm-ir.ll +++ b/llvm/test/Object/X86/nm-ir.ll @@ -29,15 +29,15 @@ module asm ".long undef_asm_sym" @g3 = common global i32 0 @g4 = private global i32 42 -@a1 = alias i32, i32* @g1 -@a2 = internal alias i32, i32* @g1 +@a1 = alias i32, ptr @g1 +@a2 = internal alias i32, ptr @g1 -define void ()* @f1() { +define ptr @f1() { call void @f5() - ret void ()* null + ret ptr null } -@ifunc_f1 = ifunc void (), void ()* ()* @f1 +@ifunc_f1 = ifunc void (), ptr @f1 define internal void @f2() { ret void diff --git a/llvm/test/Object/dllimport-globalref.ll b/llvm/test/Object/dllimport-globalref.ll index dd518bc2266cac..0a95be20a9d175 100644 --- a/llvm/test/Object/dllimport-globalref.ll +++ b/llvm/test/Object/dllimport-globalref.ll @@ -11,4 +11,4 @@ target triple = "x86_64-pc-windows-msvc" ; CHECK: U f declare dllimport void @f() -@fp = constant void ()* @f +@fp = constant ptr @f diff --git a/llvm/test/Object/dllimport.ll b/llvm/test/Object/dllimport.ll index afdb4562cc9fb8..52f583fa2487e7 100644 --- a/llvm/test/Object/dllimport.ll +++ b/llvm/test/Object/dllimport.ll @@ -12,6 +12,6 @@ declare dllimport void @f() define void @g() { call void @f() - store i32 42, i32* @v + store i32 42, ptr @v ret void } diff --git a/llvm/test/Object/mangle-ir.ll b/llvm/test/Object/mangle-ir.ll index bd7c3d93b7c9a2..76442f070385ec 100644 --- a/llvm/test/Object/mangle-ir.ll +++ b/llvm/test/Object/mangle-ir.ll @@ -7,8 +7,8 @@ target datalayout = "m:o" ; CHECK-NOT: memcpy define void @f() { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* null, i64 0, i1 false) + tail call void @llvm.memcpy.p0.p0.i64(ptr null, ptr null, i64 0, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) +declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) diff --git a/llvm/test/Object/objc-swift-mixed-imageinfo-macho.ll b/llvm/test/Object/objc-swift-mixed-imageinfo-macho.ll index d2518f46cc27a8..c506c9687ec2a4 100644 --- a/llvm/test/Object/objc-swift-mixed-imageinfo-macho.ll +++ b/llvm/test/Object/objc-swift-mixed-imageinfo-macho.ll @@ -5,11 +5,11 @@ target triple = "x86_64-apple-macosx10.15.0" -@llvm.used = appending global [1 x i8*] [i8* bitcast (i16* @__swift_reflection_version to i8*)], section "llvm.metadata", align 8 +@llvm.used = appending global [1 x ptr] [ptr @__swift_reflection_version], section "llvm.metadata", align 8 @__swift_reflection_version = linkonce_odr hidden constant i16 3 -define i32 @main(i32 %0, i8** %1) #0 { - %3 = bitcast i8** %1 to i8* +define i32 @main(i32 %0, ptr %1) #0 { + %3 = bitcast ptr %1 to ptr ret i32 0 } @@ -25,7 +25,7 @@ attributes #0 = { "frame-pointer"="all" "target-cpu"="penryn" "target-features"= !1 = !{!"-lswiftSwiftOnoneSupport"} !2 = !{!"-lswiftCore"} !3 = !{!"-lobjc"} -!4 = !{[1 x i8*]* @llvm.used, null, null, i1 false, i1 true} +!4 = !{ptr @llvm.used, null, null, i1 false, i1 true} !5 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 15]} !6 = !{i32 1, !"Objective-C Version", i32 2} !7 = !{i32 1, !"Objective-C Image Info Version", i32 0} diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/udiv-expansion.ll b/llvm/test/Transforms/CorrelatedValuePropagation/udiv-expansion.ll index a2a767084fbff6..a5fc26ebab00f5 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/udiv-expansion.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/udiv-expansion.ll @@ -90,6 +90,74 @@ define i8 @constant.divisor.v7(i8 %x) { ret i8 %div } +define i8 @constant.divisor.v6to8(i8 %x) { +; CHECK-LABEL: @constant.divisor.v6to8( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[X]], 3 +; CHECK-NEXT: ret i8 2 +; + %cmp.x.lower = icmp uge i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 9 + call void @llvm.assume(i1 %cmp.x.upper) + %div = udiv i8 %x, 3 + ret i8 %div +} + +define i8 @constant.divisor.v9to11(i8 %x) { +; CHECK-LABEL: @constant.divisor.v9to11( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[X]], 3 +; CHECK-NEXT: ret i8 3 +; + %cmp.x.lower = icmp uge i8 %x, 9 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.upper) + %div = udiv i8 %x, 3 + ret i8 %div +} + +define i8 @constant.divisor.v12to14(i8 %x) { +; CHECK-LABEL: @constant.divisor.v12to14( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 15 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[X]], 3 +; CHECK-NEXT: ret i8 4 +; + %cmp.x.lower = icmp uge i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 15 + call void @llvm.assume(i1 %cmp.x.upper) + %div = udiv i8 %x, 3 + ret i8 %div +} + +define i8 @constant.divisor.v6to11(i8 %x) { +; CHECK-LABEL: @constant.divisor.v6to11( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[DIV]] +; + %cmp.x.lower = icmp uge i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.upper) + %div = udiv i8 %x, 3 + ret i8 %div +} + ; Both are variable. Bounds are known define i8 @variable.v3(i8 %x, i8 %y) { diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll b/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll index cd0ba2f189dc83..8e276d010fdd1b 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll @@ -100,6 +100,74 @@ define i8 @constant.divisor.v7(i8 %x) { ret i8 %rem } +define i8 @constant.divisor.v6to8(i8 %x) { +; CHECK-LABEL: @constant.divisor.v6to8( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.lower = icmp uge i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 9 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} + +define i8 @constant.divisor.v9to11(i8 %x) { +; CHECK-LABEL: @constant.divisor.v9to11( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.lower = icmp uge i8 %x, 9 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} + +define i8 @constant.divisor.v12to14(i8 %x) { +; CHECK-LABEL: @constant.divisor.v12to14( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 15 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.lower = icmp uge i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 15 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} + +define i8 @constant.divisor.v6to11(i8 %x) { +; CHECK-LABEL: @constant.divisor.v6to11( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.lower = icmp uge i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 12 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} + ; Both are variable. Bounds are known define i8 @variable.v3(i8 %x, i8 %y) { @@ -130,8 +198,9 @@ define i8 @variable.v4(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) ; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i8 [[X]] -; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y]] -; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y]] +; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze i8 [[Y]] +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y_FROZEN]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y_FROZEN]] ; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X_FROZEN]], i8 [[REM_UREM]] ; CHECK-NEXT: ret i8 [[REM]] ; @@ -149,8 +218,9 @@ define i8 @variable.v4.range(ptr %x.ptr, ptr %y.ptr) { ; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[X_PTR:%.*]], align 1, !range [[RNG0]] ; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[Y_PTR:%.*]], align 1, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i8 [[X]] -; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y]] -; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y]] +; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze i8 [[Y]] +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y_FROZEN]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y_FROZEN]] ; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X_FROZEN]], i8 [[REM_UREM]] ; CHECK-NEXT: ret i8 [[REM]] ; @@ -168,8 +238,9 @@ define i8 @variable.v5(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) ; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i8 [[X]] -; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y]] -; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y]] +; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze i8 [[Y]] +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y_FROZEN]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y_FROZEN]] ; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X_FROZEN]], i8 [[REM_UREM]] ; CHECK-NEXT: ret i8 [[REM]] ; @@ -191,8 +262,9 @@ define i8 @variable.v6(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) ; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i8 [[X]] -; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y]] -; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y]] +; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze i8 [[Y]] +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], [[Y_FROZEN]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], [[Y_FROZEN]] ; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X_FROZEN]], i8 [[REM_UREM]] ; CHECK-NEXT: ret i8 [[REM]] ; @@ -226,6 +298,31 @@ define i8 @variable.v7(i8 %x, i8 %y) { ret i8 %rem } +define i8 @variable.v6to8.v3to4(i8 %x, i8 %y) { +; CHECK-LABEL: @variable.v6to8.v3to4( +; CHECK-NEXT: [[CMP_X_LOWER:%.*]] = icmp uge i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_LOWER]]) +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X]], 8 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[CMP_Y_LOWER:%.*]] = icmp uge i8 [[Y:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_LOWER]]) +; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.lower = icmp uge i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.lower) + %cmp.x.upper = icmp ult i8 %x, 8 + call void @llvm.assume(i1 %cmp.x.upper) + %cmp.y.lower = icmp uge i8 %y, 3 + call void @llvm.assume(i1 %cmp.y.lower) + %cmp.y.upper = icmp ule i8 %y, 4 + call void @llvm.assume(i1 %cmp.y.upper) + %rem = urem i8 %x, %y + ret i8 %rem +} + ; Constant divisor define i8 @large.divisor.v0(i8 %x) { diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll index afeb5d5251d0f4..4631b81cd1ce1f 100644 --- a/llvm/test/Transforms/InstCombine/ptrmask.ll +++ b/llvm/test/Transforms/InstCombine/ptrmask.ll @@ -80,12 +80,12 @@ define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2(ptr ad define ptr @ptrmask_combine_add_nonnull(ptr %p) { ; CHECK-LABEL: define ptr @ptrmask_combine_add_nonnull ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[PM0:%.*]] = call align 64 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 33 -; CHECK-NEXT: [[R:%.*]] = call nonnull align 32 ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32) +; CHECK-NEXT: [[PM0:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -60) +; CHECK-NEXT: [[PGEP1:%.*]] = getelementptr i8, ptr [[PM0]], i64 32 +; CHECK-NEXT: [[R:%.*]] = call nonnull align 32 ptr @llvm.ptrmask.p0.i64(ptr [[PGEP1]], i64 -32) ; CHECK-NEXT: ret ptr [[R]] ; - %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64) + %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -60) %pgep = getelementptr i8, ptr %pm0, i64 33 %r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -16) ret ptr %r @@ -287,6 +287,162 @@ define ptr addrspace(1) @ptrmask_maintain_provenance_i32(ptr addrspace(1) %p0) { ret ptr addrspace(1) %r } +define ptr @ptrmask_is_nop0(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_is_nop0 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -8) + ret ptr %pm +} + +define ptr @ptrmask_is_nop1(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_is_nop1 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep0(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep0 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -16) +; CHECK-NEXT: ret ptr [[PM]] +; + %gep = getelementptr i8, ptr %p, i32 5 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep1(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep1 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %gep = getelementptr i8, ptr %p, i32 6 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -8) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep2(ptr align 16 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep2 +; CHECK-SAME: (ptr align 16 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 12 +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep = getelementptr i8, ptr %p, i32 15 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep4(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep4 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 24 +; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[GEP1]], i64 -16) +; CHECK-NEXT: ret ptr [[PM]] +; + %gep = getelementptr i8, ptr %p, i32 29 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep5(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep5 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 24 +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep = getelementptr i8, ptr %p, i32 30 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -8) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep6(ptr align 16 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep6 +; CHECK-SAME: (ptr align 16 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 28 +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep = getelementptr i8, ptr %p, i32 31 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep_indirect0(ptr align 16 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep_indirect0 +; CHECK-SAME: (ptr align 16 [[P:%.*]]) { +; 44 from 4*sizeof(i32) + (31 & -4) +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 44 +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep0 = getelementptr i32, ptr %p, i32 4 + %gep1 = getelementptr i8, ptr %gep0, i32 31 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep1, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_to_modified_gep_indirect1(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep_indirect1 +; CHECK-SAME: (ptr [[P:%.*]]) { + +; CHECK-NEXT: [[R:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -16) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[R]], i64 32 +; CHECK-NEXT: ret ptr [[GEP]] +; + %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -16) + %pgep = getelementptr i8, ptr %pm0, i64 33 + %r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -16) + ret ptr %r +} + +define ptr @ptrmask_to_modified_gep_zero_argument() { +; CHECK-LABEL: define ptr @ptrmask_to_modified_gep_zero_argument() { +; CHECK-NEXT: [[P:%.*]] = call nonnull align 4 ptr @llvm.ptrmask.p0.i64(ptr nonnull inttoptr (i64 31 to ptr), i64 28) +; CHECK-NEXT: ret ptr [[P]] +; + %gep = getelementptr inbounds i8, ptr null, i32 31 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_to_preserves_inbounds(ptr align 16 %p) { +; CHECK-LABEL: define ptr @ptrmask_to_preserves_inbounds +; CHECK-SAME: (ptr align 16 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 28 +; CHECK-NEXT: ret ptr [[GEP1]] +; + %gep = getelementptr inbounds i8, ptr %p, i32 31 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4) + ret ptr %pm +} + +define ptr @ptrmask_of_gep_requires_i8(ptr align 8 %p) { +; CHECK-LABEL: define ptr @ptrmask_of_gep_requires_i8 +; CHECK-SAME: (ptr align 8 [[P:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[GEP1]], i64 -16) +; CHECK-NEXT: ret ptr [[PM]] +; + %gep = getelementptr i16, ptr %p, i32 5 + %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16) + ret ptr %pm +} + +define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented(<2 x ptr> align 8 %p) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented +; CHECK-SAME: (<2 x ptr> align 8 [[P:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, <2 x ptr> [[P]], i64 17 +; CHECK-NEXT: [[PM:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[GEP]], <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> [[PM]] +; + %gep = getelementptr i8, <2 x ptr> %p, i32 17 + %pm = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %gep, <2 x i64> ) + ret <2 x ptr> %pm +} + define ptr @ptrmask_is_useless0(i64 %i, i64 %m) { ; CHECK-LABEL: define ptr @ptrmask_is_useless0 ; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) { diff --git a/llvm/test/Transforms/RemoveTraps/remove-traps.ll b/llvm/test/Transforms/RemoveTraps/remove-traps.ll new file mode 100644 index 00000000000000..71549e7d9b4122 --- /dev/null +++ b/llvm/test/Transforms/RemoveTraps/remove-traps.ll @@ -0,0 +1,397 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes='function(remove-traps)' -S | FileCheck %s --check-prefixes=NOPROFILE +; RUN: opt < %s -passes='function(remove-traps)' -remove-traps-random-rate=1 -S | FileCheck %s --check-prefixes=ALL +; RUN: opt < %s -passes='require,function(remove-traps)' -S | FileCheck %s --check-prefixes=HOT +; RUN: opt < %s -passes='require,function(remove-traps)' -remove-traps-percentile-cutoff-hot=700000 -S | FileCheck %s --check-prefixes=HOT70 + +target triple = "x86_64-pc-linux-gnu" + +declare void @llvm.ubsantrap(i8 immarg) + +define dso_local noundef i32 @simple(ptr noundef readonly %0) { +; NOPROFILE-LABEL: define dso_local noundef i32 @simple( +; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE: 3: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NOPROFILE-NEXT: ret i32 [[TMP5]] +; +; ALL-LABEL: define dso_local noundef i32 @simple( +; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; ALL: 3: +; ALL-NEXT: unreachable +; ALL: 4: +; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; ALL-NEXT: ret i32 [[TMP5]] +; +; HOT-LABEL: define dso_local noundef i32 @simple( +; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT: 3: +; HOT-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT-NEXT: unreachable +; HOT: 4: +; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT-NEXT: ret i32 [[TMP5]] +; +; HOT70-LABEL: define dso_local noundef i32 @simple( +; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70: 3: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT70-NEXT: ret i32 [[TMP5]] +; + %2 = icmp eq ptr %0, null + br i1 %2, label %3, label %4 + +3: + tail call void @llvm.ubsantrap(i8 22) + unreachable + +4: + %5 = load i32, ptr %0, align 4 + ret i32 %5 +} + + +define dso_local noundef i32 @hot(ptr noundef readonly %0) !prof !36 { +; NOPROFILE-LABEL: define dso_local noundef i32 @hot( +; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE: 3: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NOPROFILE-NEXT: ret i32 [[TMP5]] +; +; ALL-LABEL: define dso_local noundef i32 @hot( +; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; ALL: 3: +; ALL-NEXT: unreachable +; ALL: 4: +; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; ALL-NEXT: ret i32 [[TMP5]] +; +; HOT-LABEL: define dso_local noundef i32 @hot( +; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT: 3: +; HOT-NEXT: unreachable +; HOT: 4: +; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT-NEXT: ret i32 [[TMP5]] +; +; HOT70-LABEL: define dso_local noundef i32 @hot( +; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70: 3: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT70-NEXT: ret i32 [[TMP5]] +; + %2 = icmp eq ptr %0, null + br i1 %2, label %3, label %4 + +3: + tail call void @llvm.ubsantrap(i8 22) + unreachable + +4: + %5 = load i32, ptr %0, align 4 + ret i32 %5 +} + +define dso_local noundef i32 @veryHot(ptr noundef readonly %0) !prof !39 { +; NOPROFILE-LABEL: define dso_local noundef i32 @veryHot( +; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE: 3: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NOPROFILE-NEXT: ret i32 [[TMP5]] +; +; ALL-LABEL: define dso_local noundef i32 @veryHot( +; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; ALL: 3: +; ALL-NEXT: unreachable +; ALL: 4: +; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; ALL-NEXT: ret i32 [[TMP5]] +; +; HOT-LABEL: define dso_local noundef i32 @veryHot( +; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT: 3: +; HOT-NEXT: unreachable +; HOT: 4: +; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT-NEXT: ret i32 [[TMP5]] +; +; HOT70-LABEL: define dso_local noundef i32 @veryHot( +; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70: 3: +; HOT70-NEXT: unreachable +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT70-NEXT: ret i32 [[TMP5]] +; + %2 = icmp eq ptr %0, null + br i1 %2, label %3, label %4 + +3: + tail call void @llvm.ubsantrap(i8 22) + unreachable + +4: + %5 = load i32, ptr %0, align 4 + ret i32 %5 +} + + +define dso_local noundef i32 @branchColdFnHot(i32 noundef %0, ptr noundef readonly %1) !prof !39 { +; NOPROFILE-LABEL: define dso_local noundef i32 @branchColdFnHot( +; NOPROFILE-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; NOPROFILE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; NOPROFILE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; NOPROFILE-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; NOPROFILE: 6: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 7: +; NOPROFILE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; NOPROFILE-NEXT: br label [[TMP9]] +; NOPROFILE: 9: +; NOPROFILE-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; NOPROFILE-NEXT: ret i32 [[TMP10]] +; +; ALL-LABEL: define dso_local noundef i32 @branchColdFnHot( +; ALL-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; ALL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; ALL-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; ALL: 4: +; ALL-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; ALL-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; ALL: 6: +; ALL-NEXT: unreachable +; ALL: 7: +; ALL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; ALL-NEXT: br label [[TMP9]] +; ALL: 9: +; ALL-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; ALL-NEXT: ret i32 [[TMP10]] +; +; HOT-LABEL: define dso_local noundef i32 @branchColdFnHot( +; HOT-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; HOT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; HOT: 4: +; HOT-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT: 6: +; HOT-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT-NEXT: unreachable +; HOT: 7: +; HOT-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT-NEXT: br label [[TMP9]] +; HOT: 9: +; HOT-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT-NEXT: ret i32 [[TMP10]] +; +; HOT70-LABEL: define dso_local noundef i32 @branchColdFnHot( +; HOT70-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; HOT70-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT70-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT70-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT70: 6: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 7: +; HOT70-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT70-NEXT: br label [[TMP9]] +; HOT70: 9: +; HOT70-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT70-NEXT: ret i32 [[TMP10]] +; + %3 = icmp eq i32 %0, 0 + br i1 %3, label %9, label %4, !prof !38 + +4: + %5 = icmp eq ptr %1, null + br i1 %5, label %6, label %7 + +6: + tail call void @llvm.ubsantrap(i8 22) #2 + unreachable + +7: + %8 = load i32, ptr %1, align 4 + br label %9 + +9: + %10 = phi i32 [ %8, %7 ], [ 0, %2 ] + ret i32 %10 +} + +define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readonly %1) !prof !36 { +; NOPROFILE-LABEL: define dso_local noundef i32 @branchHotFnCold( +; NOPROFILE-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; NOPROFILE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; NOPROFILE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; NOPROFILE-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; NOPROFILE: 6: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 7: +; NOPROFILE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; NOPROFILE-NEXT: br label [[TMP9]] +; NOPROFILE: 9: +; NOPROFILE-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; NOPROFILE-NEXT: ret i32 [[TMP10]] +; +; ALL-LABEL: define dso_local noundef i32 @branchHotFnCold( +; ALL-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; ALL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; ALL-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; ALL: 4: +; ALL-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; ALL-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; ALL: 6: +; ALL-NEXT: unreachable +; ALL: 7: +; ALL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; ALL-NEXT: br label [[TMP9]] +; ALL: 9: +; ALL-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; ALL-NEXT: ret i32 [[TMP10]] +; +; HOT-LABEL: define dso_local noundef i32 @branchHotFnCold( +; HOT-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; HOT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; HOT: 4: +; HOT-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT: 6: +; HOT-NEXT: unreachable +; HOT: 7: +; HOT-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT-NEXT: br label [[TMP9]] +; HOT: 9: +; HOT-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT-NEXT: ret i32 [[TMP10]] +; +; HOT70-LABEL: define dso_local noundef i32 @branchHotFnCold( +; HOT70-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; HOT70-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT70-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT70-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT70: 6: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 7: +; HOT70-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT70-NEXT: br label [[TMP9]] +; HOT70: 9: +; HOT70-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT70-NEXT: ret i32 [[TMP10]] +; + %3 = icmp eq i32 %0, 0 + br i1 %3, label %9, label %4, !prof !37 + +4: + %5 = icmp eq ptr %1, null + br i1 %5, label %6, label %7 + +6: + tail call void @llvm.ubsantrap(i8 22) #2 + unreachable + +7: + %8 = load i32, ptr %1, align 4 + br label %9 + +9: + %10 = phi i32 [ %8, %7 ], [ 0, %2 ] + ret i32 %10 +} + +!llvm.module.flags = !{!6} +!6 = !{i32 1, !"ProfileSummary", !7} +!7 = !{!8, !9, !10, !11, !12, !13, !14, !17} +!8 = !{!"ProfileFormat", !"InstrProf"} +!9 = !{!"TotalCount", i64 30000} +!10 = !{!"MaxCount", i64 10000} +!11 = !{!"MaxInternalCount", i64 10000} +!12 = !{!"MaxFunctionCount", i64 10000} +!13 = !{!"NumCounts", i64 3} +!14 = !{!"NumFunctions", i64 5} +!17 = !{!"DetailedSummary", !18} +!18 = !{!19, !29, !30, !32, !34} +!19 = !{i32 10000, i64 10000, i32 3} +!29 = !{i32 950000, i64 5000, i32 3} +!30 = !{i32 990000, i64 500, i32 4} +!32 = !{i32 999900, i64 250, i32 4} +!34 = !{i32 999999, i64 1, i32 6} + +!36 = !{!"function_entry_count", i64 1000} +!39 = !{!"function_entry_count", i64 7000} + +!37 = !{!"branch_weights", i32 1, i32 1000} +!38 = !{!"branch_weights", i32 1000, i32 1} + +;. +; NOPROFILE: [[PROF16]] = !{!"function_entry_count", i64 1000} +; NOPROFILE: [[PROF17]] = !{!"function_entry_count", i64 7000} +; NOPROFILE: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; NOPROFILE: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. +; ALL: [[PROF16]] = !{!"function_entry_count", i64 1000} +; ALL: [[PROF17]] = !{!"function_entry_count", i64 7000} +; ALL: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; ALL: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. +; HOT: [[PROF16]] = !{!"function_entry_count", i64 1000} +; HOT: [[PROF17]] = !{!"function_entry_count", i64 7000} +; HOT: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; HOT: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. +; HOT70: [[PROF16]] = !{!"function_entry_count", i64 1000} +; HOT70: [[PROF17]] = !{!"function_entry_count", i64 7000} +; HOT70: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; HOT70: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll index b9ef17c49b7514..6f5d3d3785e0c8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll @@ -54,3 +54,57 @@ entry: store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4 ret void } + +define void @test1() { +; CHECK-LABEL: define void @test1( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60 +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> ), i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32> +; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 +; CHECK-NEXT: ret void +; +entry: + %arrayidx1 = getelementptr i32, ptr null, i64 1 + %0 = load i32, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr i32, ptr null, i64 63 + %1 = load i32, ptr %arrayidx2, align 4 + %mul = mul i32 %1, %0 + %conv = sext i32 %mul to i64 + %shr = lshr i64 %conv, 0 + %conv3 = trunc i64 %shr to i32 + store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 + %arrayidx5 = getelementptr i32, ptr null, i64 33 + %2 = load i32, ptr %arrayidx5, align 4 + %arrayidx6 = getelementptr i32, ptr null, i64 62 + %3 = load i32, ptr %arrayidx6, align 4 + %mul7 = mul i32 %3, %2 + %conv8 = sext i32 %mul7 to i64 + %shr10 = lshr i64 %conv8, 0 + %conv11 = trunc i64 %shr10 to i32 + store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4 + %arrayidx13 = getelementptr i32, ptr null, i64 7 + %4 = load i32, ptr %arrayidx13, align 4 + %arrayidx14 = getelementptr i32, ptr null, i64 61 + %5 = load i32, ptr %arrayidx14, align 4 + %mul15 = mul i32 %5, %4 + %conv16 = sext i32 %mul15 to i64 + %shr18 = lshr i64 %conv16, 0 + %conv19 = trunc i64 %shr18 to i32 + store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8 + %6 = load i32, ptr null, align 4 + %arrayidx22 = getelementptr i32, ptr null, i64 60 + %7 = load i32, ptr %arrayidx22, align 4 + %mul23 = mul i32 %7, %6 + %conv24 = sext i32 %mul23 to i64 + %shr26 = lshr i64 %conv24, 0 + %conv27 = trunc i64 %shr26 to i32 + store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4 + ret void +} diff --git a/llvm/test/Verifier/RemoveDI/blockbyref.ll b/llvm/test/Verifier/RemoveDI/blockbyref.ll new file mode 100644 index 00000000000000..86321a6ae78e80 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/blockbyref.ll @@ -0,0 +1,18 @@ +; RUN: llvm-as -disable-output <%s 2>&1| FileCheck %s + +; CHECK: DIBlockByRefStruct on DICompositeType is no longer supported +; CHECK: warning: ignoring invalid debug info + +define void @foo() { +entry: + %s = alloca i32 + #dbg_declare(ptr %s, !2, !DIExpression(), !DILocation(scope: !1)) + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() +!2 = !DILocalVariable(scope: !1, type: !3) +!3 = !DICompositeType(tag: DW_TAG_structure_type, flags: DIFlagReservedBit4) diff --git a/llvm/test/Verifier/RemoveDI/dbg-invalid-vector.ll b/llvm/test/Verifier/RemoveDI/dbg-invalid-vector.ll new file mode 100644 index 00000000000000..0832c361c3080e --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/dbg-invalid-vector.ll @@ -0,0 +1,35 @@ +; RUN: opt -passes=verify -disable-output <%s 2>&1 | FileCheck %s +; +; This test creates an invalid vector by defining multiple elements for the +; vector's DICompositeType definition. A vector should only have one element +; in its DICompositeType 'elements' array. +; +; CHECK: invalid vector + +@f.foo = private unnamed_addr constant <6 x float> zeroinitializer, align 32 + +define void @f() { + %1 = alloca <6 x float>, align 32 + #dbg_declare(ptr %1, !10, !DIExpression(), !18) + ret void +} + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "test.c", directory: "/dbg/info") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !DILocalVariable(name: "foo", scope: !7, file: !1, line: 4, type: !12) +!12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !13, size: 256, flags: DIFlagVector, elements: !14) +!13 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!14 = !{!15, !19} +!15 = !DISubrange(count: 6) +!18 = !DILocation(line: 4, column: 48, scope: !7) +!19 = !DISubrange(count: 42) diff --git a/llvm/test/Verifier/RemoveDI/di-subroutine-localvar.ll b/llvm/test/Verifier/RemoveDI/di-subroutine-localvar.ll new file mode 100644 index 00000000000000..14e58883989968 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/di-subroutine-localvar.ll @@ -0,0 +1,41 @@ +; RUN: opt %s -passes=verify 2>&1 | FileCheck %s +; CHECK: invalid type +; CHECK: !20 = !DILocalVariable(name: "f", scope: !21, file: !13, line: 970, type: !14) +; CHECK: !14 = !DISubroutineType(types: !15) + + +%timespec.0.1.2.3.0.1.2 = type { i64, i64 } +define internal i64 @init_vdso_clock_gettime(i32, ptr nonnull) unnamed_addr !dbg !142 { + #dbg_value(ptr null, !162, !DIExpression(), !167) + ret i64 -38, !dbg !168 +} +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!1} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "zig 0.3.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !76) +!2 = !DIFile(filename: "test", directory: ".") +!3 = !{!4} +!4 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Arch", scope: !5, file: !5, line: 44, baseType: !6, size: 8, align: 8, elements: !7) +!5 = !DIFile(filename: "builtin.zig", directory: "/home/andy/.local/share/zig/stage1/builtin/ugMGxVES9OkDAffv3xhJS3KQVy0Wm1xPM3Bc6x4MBuup5aetdi5pVTrGRG2aDAn0") +!6 = !DIBasicType(name: "u7", size: 8, encoding: DW_ATE_unsigned) +!7 = !{!8} +!8 = !DIEnumerator(name: "armv8_5a", value: 0) +!76 = !{!77} +!77 = !DIGlobalVariableExpression(var: !78, expr: !DIExpression()) +!78 = distinct !DIGlobalVariable(name: "arch", linkageName: "arch", scope: !5, file: !5, line: 437, type: !4, isLocal: true, isDefinition: true) +!81 = !DIFile(filename: "index.zig", directory: "/store/dev/zig/build-llvm8-debug/lib/zig/std/os/linux") +!142 = distinct !DISubprogram(name: "init_vdso_clock_gettime", scope: !81, file: !81, line: 968, type: !143, scopeLine: 968, flags: DIFlagStaticMember, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, retainedNodes: !153) +!143 = !DISubroutineType(types: !144) +!144 = !{!145} +!145 = !DIBasicType(name: "usize", size: 64, encoding: DW_ATE_unsigned) +!146 = !DIBasicType(name: "i32", size: 32, encoding: DW_ATE_signed) +!153 = !{!154} +!154 = !DILocalVariable(name: "clk", arg: 1, scope: !142, file: !81, line: 968, type: !146) +!162 = !DILocalVariable(name: "f", scope: !163, file: !81, line: 970, type: !143) +!163 = distinct !DILexicalBlock(scope: !164, file: !81, line: 969, column: 5) +!164 = distinct !DILexicalBlock(scope: !165, file: !81, line: 968, column: 66) +!165 = distinct !DILexicalBlock(scope: !166, file: !81, line: 968, column: 45) +!166 = distinct !DILexicalBlock(scope: !142, file: !81, line: 968, column: 35) +!167 = !DILocation(line: 970, column: 5, scope: !163) +!168 = !DILocation(line: 972, column: 28, scope: !169) +!169 = distinct !DILexicalBlock(scope: !163, file: !81, line: 970, column: 5) diff --git a/llvm/test/Verifier/RemoveDI/diexpression-entry-value-llvm-ir.ll b/llvm/test/Verifier/RemoveDI/diexpression-entry-value-llvm-ir.ll new file mode 100644 index 00000000000000..881ec4a86fb644 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/diexpression-entry-value-llvm-ir.ll @@ -0,0 +1,34 @@ +; RUN: llvm-as -disable-output <%s 2>&1| FileCheck %s + +; CHECK-NOT: #dbg_value +; CHECK: Entry values are only allowed in MIR unless they target a swiftasync Argument +; CHECK: #dbg_value(i32 %param, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1) +; CHECK-NOT: #dbg_value +; CHECK-NOT: Entry values are only allowed +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %param, ptr swiftasync %ok_param) !dbg !4 { +entry: + #dbg_value(i32 %param, !8, !DIExpression(DW_OP_LLVM_entry_value, 1), !9) + #dbg_value(ptr %ok_param, !8, !DIExpression(DW_OP_LLVM_entry_value, 1), !9) + #dbg_value(ptr poison, !8, !DIExpression(DW_OP_LLVM_entry_value, 1), !9) + #dbg_value(ptr undef, !8, !DIExpression(DW_OP_LLVM_entry_value, 1), !9) + ret void +} + + +attributes #0 = { nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "a.c", directory: "/") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, type: !5, unit: !0) +!5 = !DISubroutineType(types: !6) +!6 = !{null, !7} +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !DILocalVariable(name: "param", arg: 1, scope: !4, file: !1, type: !7) +!9 = !DILocation(line: 0, scope: !4) diff --git a/llvm/test/Verifier/RemoveDI/fnarg-debuginfo.ll b/llvm/test/Verifier/RemoveDI/fnarg-debuginfo.ll new file mode 100644 index 00000000000000..db1a9a8ba18945 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/fnarg-debuginfo.ll @@ -0,0 +1,26 @@ +; RUN: llvm-as -disable-output < %s -o /dev/null 2>&1 | FileCheck %s + + +define void @foo() !dbg !2 { +entry: + %a = alloca i32 + ; CHECK: conflicting debug info for argument + #dbg_value(i32 0, !3, !DIExpression(), !6) + #dbg_declare(ptr %a, !4, !DIExpression(), !6) + ret void, !dbg !6 +} + +; CHECK: warning: ignoring invalid debug info + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", emissionKind: FullDebug) +!1 = !DIFile(filename: "x.c", directory: "/") +!2 = distinct !DISubprogram(name: "foo", scope: !0, isDefinition: true, unit: !0) +!3 = !DILocalVariable(name: "a", arg: 1, scope: !2, file: !1, line: 1, type: !5) +!4 = !DILocalVariable(name: "b", arg: 1, scope: !2, file: !1, line: 1, type: !5) +!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!6 = !DILocation(line: 1, scope: !2) +!7 = !{i32 2, !"Dwarf Version", i32 4} +!8 = !{i32 1, !"Debug Info Version", i32 3} diff --git a/llvm/test/Verifier/RemoveDI/fnarg-nodebug.ll b/llvm/test/Verifier/RemoveDI/fnarg-nodebug.ll new file mode 100644 index 00000000000000..f5526030278eb9 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/fnarg-nodebug.ll @@ -0,0 +1,58 @@ +; RUN: llvm-as < %s -o %t +; RUN: llvm-dis < %t -o - | FileCheck %s +; Created at -O1 from: +; int sink(int); +; __attribute__((always_inline)) int f(int i) { return sink(i); } +; __attribute__((always_inline)) int g(int j) { return sink(j); } +; __attribute__((nodebug)) int nodebug(int k) { return f(k)+g(k); } +source_filename = "t.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +declare i32 @sink(i32) local_unnamed_addr + +define i32 @nodebug(i32 %k) local_unnamed_addr #2 { +entry: +; This should not set off the FnArg Verifier. The two variables are in differrent scopes. + #dbg_value(i32 %k, !12, !13, !14) + %call.k = tail call i32 @sink(i32 %k) #4, !dbg !15 + #dbg_value(i32 %k, !19, !13, !20) + %call.k3 = tail call i32 @sink(i32 %k) #4, !dbg !21 + %add = add nsw i32 %call.k3, %call.k + ret i32 %add +} + +; Function Attrs: nounwind readnone + +attributes #2 = { nounwind ssp uwtable } +attributes #3 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (trunk 297153) (llvm/trunk 297155)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "t.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"PIC Level", i32 2} +!6 = !{!"clang version 5.0.0 (trunk 297153) (llvm/trunk 297155)"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12} +; CHECK: !DILocalVariable(name: "i", arg: 1 +!12 = !DILocalVariable(name: "i", arg: 1, scope: !7, file: !1, line: 2, type: !10) +!13 = !DIExpression() +!14 = !DILocation(line: 2, column: 42, scope: !7) +!15 = !DILocation(line: 2, column: 54, scope: !7) +!16 = !DILocation(line: 2, column: 47, scope: !7) +!17 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !18) +!18 = !{!19} +; CHECK: !DILocalVariable(name: "j", arg: 1 +!19 = !DILocalVariable(name: "j", arg: 1, scope: !17, file: !1, line: 3, type: !10) +!20 = !DILocation(line: 3, column: 42, scope: !17) +!21 = !DILocation(line: 3, column: 54, scope: !17) +!22 = !DILocation(line: 3, column: 47, scope: !17) diff --git a/llvm/test/Verifier/RemoveDI/invalid-disubrange-count-node.ll b/llvm/test/Verifier/RemoveDI/invalid-disubrange-count-node.ll new file mode 100644 index 00000000000000..f36cee5946e473 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/invalid-disubrange-count-node.ll @@ -0,0 +1,36 @@ +; RUN: llvm-as < %s -disable-output 2>&1 | FileCheck %s + +define void @foo(i32 %n) { +entry: + %0 = zext i32 %n to i64 + %vla = alloca i32, i64 %0, align 16 + #dbg_declare(ptr %vla, !19, !DIExpression(), !18) + ret void +} + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.1", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "vla.c", directory: "/path/to") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 5.0.1"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 20, type: !8, isLocal: false, isDefinition: true, scopeLine: 20, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!16, !19} +!12 = !DIExpression() +!16 = !DILocalVariable(name: "vla_expr", scope: !7, file: !1, line: 21, type: !17) +!17 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned) +!18 = !DILocation(line: 21, column: 7, scope: !7) +!19 = !DILocalVariable(name: "vla", scope: !7, file: !1, line: 21, type: !20) +!20 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, align: 32, elements: !21) +!21 = !{!22} +; CHECK: Count must be signed constant or DIVariable or DIExpression +!22 = !DISubrange(count: !17) diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-address.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-address.ll new file mode 100644 index 00000000000000..9d400b892ce8c1 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-address.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record address/value +; CHECK-NEXT: #dbg_declare({{.*}}) +; CHECK-NEXT: !"" +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_declare(!"", !DILocalVariable(scope: !1), !DIExpression(), !DILocation(scope: !1)) + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-expression.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-expression.ll new file mode 100644 index 00000000000000..b52c15cb3f8816 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-expression.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record expression +; CHECK-NEXT: #dbg_declare({{.*}}) +; CHECK-NEXT: !{} +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_declare(ptr %s, !DILocalVariable(scope: !1), !{}, !DILocation(scope: !1)) + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-variable.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-variable.ll new file mode 100644 index 00000000000000..db2b0e0a54e2bd --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.declare-variable.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record variable +; CHECK-NEXT: #dbg_declare({{.*}}) +; CHECK-NEXT: !{} +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_declare(ptr %s, !{}, !DIExpression(), !DILocation(scope: !1)) + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.intrinsic-dbg-attachment.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.intrinsic-dbg-attachment.ll new file mode 100644 index 00000000000000..1839821ab14070 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.intrinsic-dbg-attachment.ll @@ -0,0 +1,55 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +define void @foo() { +entry: + #dbg_value( + ptr undef, + !DILocalVariable(scope: !1), + !DIExpression(), + !{}) +; CHECK-LABEL: invalid #dbg record DILocation +; CHECK-NEXT: #dbg_value({{.*}}) + + #dbg_declare( + ptr undef, + !DILocalVariable(scope: !1), + !DIExpression(), + !{}) +; CHECK-LABEL: invalid #dbg record DILocation +; CHECK-NEXT: #dbg_declare({{.*}}) + + #dbg_value( + ptr undef, + !DILocalVariable(scope: !1), + !DIExpression(), + !DILocation(scope: !2)) +; CHECK-LABEL: mismatched subprogram between #dbg record variable and DILocation +; CHECK-NEXT: #dbg_value({{[^,]+}}, ![[VAR:[0-9]+]], {{[^,]+}}, ![[LOC:[0-9]+]] +; CHECK-NEXT: label %entry +; CHECK-NEXT: ptr @foo +; CHECK-NEXT: ![[VAR]] = !DILocalVariable({{.*}}scope: ![[VARSP:[0-9]+]] +; CHECK-NEXT: ![[VARSP]] = distinct !DISubprogram( +; CHECK-NEXT: ![[LOC]] = !DILocation({{.*}}scope: ![[LOCSP:[0-9]+]] +; CHECK-NEXT: ![[LOCSP]] = distinct !DISubprogram( + + #dbg_declare( + ptr undef, + !DILocalVariable(scope: !1), + !DIExpression(), + !DILocation(scope: !2)) +; CHECK-LABEL: mismatched subprogram between #dbg record variable and DILocation +; CHECK-NEXT: #dbg_declare({{[^,]+}}, ![[VAR:[0-9]+]], {{.*[^,]+}}, ![[LOC:[0-9]+]] +; CHECK-NEXT: label %entry +; CHECK-NEXT: ptr @foo +; CHECK-NEXT: ![[VAR]] = !DILocalVariable({{.*}}scope: ![[VARSP:[0-9]+]] +; CHECK-NEXT: ![[VARSP]] = distinct !DISubprogram( +; CHECK-NEXT: ![[LOC]] = !DILocation({{.*}}scope: ![[LOCSP:[0-9]+]] +; CHECK-NEXT: ![[LOCSP]] = distinct !DISubprogram( + + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram(name: "foo") +!2 = distinct !DISubprogram(name: "bar") diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.value-expression.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-expression.ll new file mode 100644 index 00000000000000..cbd93c1ce6a4d6 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-expression.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record expression +; CHECK-NEXT: #dbg_value({{.*}}) +; CHECK-NEXT: !{} +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_value(ptr %s, !DILocalVariable(scope: !1), !{}, !DILocation(scope: !1)) + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.value-value.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-value.ll new file mode 100644 index 00000000000000..b6fcde250526be --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-value.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record address/value +; CHECK-NEXT: #dbg_value({{.*}}) +; CHECK-NEXT: !"" +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_value(!"", !DILocalVariable(scope: !1), !DIExpression(), !DILocation(scope: !1)) + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/llvm.dbg.value-variable.ll b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-variable.ll new file mode 100644 index 00000000000000..0a5fe79453d721 --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/llvm.dbg.value-variable.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: invalid #dbg record variable +; CHECK-NEXT: #dbg_value({{.*}}) +; CHECK-NEXT: !{} +; CHECK: warning: ignoring invalid debug info + +define void @foo(i32 %a) { +entry: + %s = alloca i32 + #dbg_value(ptr %s, !{}, !DIExpression(), !DILocation(scope: !1)) + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DISubprogram() diff --git a/llvm/test/Verifier/RemoveDI/set1.ll b/llvm/test/Verifier/RemoveDI/set1.ll new file mode 100644 index 00000000000000..d54ba8876c366c --- /dev/null +++ b/llvm/test/Verifier/RemoveDI/set1.ll @@ -0,0 +1,62 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s + +define void @Main__Test() #0 !dbg !17 { +entry: + %as = alloca i64, align 8 + %bs = alloca i64, align 8 + br label %second, !dbg !21 + +second: ; preds = %entry + #dbg_declare(ptr %as, !22, !DIExpression(), !25) + #dbg_declare(ptr %bs, !26, !DIExpression(), !25) + store i64 36028797018972298, ptr %as, align 8, !dbg !28 + store i64 85, ptr %bs, align 8, !dbg !29 + ret void, !dbg !21 +} + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + +!llvm.ident = !{!0} +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!14, !15, !16} + +!0 = !{!"versions- cm3: d5.10.0 llvm: 12.0"} +!1 = distinct !DICompileUnit(language: DW_LANG_Modula3, file: !2, producer: "cm3", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3) +!2 = !DIFile(filename: "Main.m3", directory: "/home/peter/cm3/settest/src") +!3 = !{!4} +!4 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum", scope: !2, file: !2, line: 11, size: 8, align: 8, elements: !5) +!5 = !{!6, !7, !8, !9, !10, !11, !12, !13} +!6 = !DIEnumerator(name: "alpha", value: 0) +!7 = !DIEnumerator(name: "beta", value: 1) +!8 = !DIEnumerator(name: "gamma", value: 2) +!9 = !DIEnumerator(name: "delta", value: 3) +!10 = !DIEnumerator(name: "epsilon", value: 4) +!11 = !DIEnumerator(name: "theta", value: 5) +!12 = !DIEnumerator(name: "psi", value: 6) +!13 = !DIEnumerator(name: "zeta", value: 7) +!14 = !{i64 2, !"Dwarf Version", i64 4} +!15 = !{i64 2, !"Debug Info Version", i64 3} +!16 = !{i64 2, !"wchar_size", i64 2} +!17 = distinct !DISubprogram(name: "Test", linkageName: "Main__Test", scope: !2, file: !2, line: 11, type: !18, scopeLine: 11, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !20) +!18 = !DISubroutineType(types: !19) +!19 = !{null} +!20 = !{} +!21 = !DILocation(line: 20, scope: !17) +!22 = !DILocalVariable(name: "as", scope: !17, file: !2, line: 11, type: !23) +; CHECK: invalid set base type +!23 = !DIDerivedType(tag: DW_TAG_set_type, name: "SS", scope: !2, file: !2, line: 11, baseType: !24, size: 64, align: 64) +!24 = !DIBasicType(name: "SR", size: 8, encoding: DW_ATE_signed) +!25 = !DILocation(line: 11, scope: !17) +!26 = !DILocalVariable(name: "bs", scope: !17, file: !2, line: 11, type: !27) +!27 = !DIDerivedType(tag: DW_TAG_set_type, name: "ST", scope: !2, file: !2, line: 11, baseType: !23, size: 64, align: 64) +!28 = !DILocation(line: 17, scope: !17) +!29 = !DILocation(line: 18, scope: !17) +!30 = distinct !DISubprogram(name: "Main_M3", linkageName: "Main_M3", scope: !2, file: !2, line: 22, type: !31, scopeLine: 22, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !20) +!31 = !DISubroutineType(types: !32) +!32 = !{!33, !35} +!33 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "ADDR", baseType: !34, size: 64, align: 64) +!34 = !DICompositeType(tag: DW_TAG_class_type, name: "ADDR__HeapObject", scope: !2, file: !2, line: 22, size: 64, align: 64, elements: !19, identifier: "AJWxb1") +!35 = !DIBasicType(name: "INTEGER", size: 64, encoding: DW_ATE_signed) +!36 = !DILocation(line: 23, scope: !30) +!37 = !DILocalVariable(name: "mode", arg: 1, scope: !30, file: !2, line: 22, type: !35) +!38 = !DILocation(line: 22, scope: !30) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll new file mode 100644 index 00000000000000..8457bf7dc40a2e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I33:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X_I33]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected new file mode 100644 index 00000000000000..3549a4d76aa762 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I33:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[Y]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X_I33]], [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset new file mode 100644 index 00000000000000..5142e3ed32ba45 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values.ll.expected.reset @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I34:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[Y]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_I34]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 3 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll new file mode 100644 index 00000000000000..d05c26241f87c1 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i1 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i1 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP10]] +; + %1 = icmp eq i32 %x, 0 + %2 = call i32 @foo(i1 %1) + + %3 = icmp eq i32 %x, 2 + %4 = call i32 @foo(i1 %3) + %5 = icmp ne i32 %4, 0 + %6 = select i1 %5, i32 %4, i32 %2 + + ret i32 %6 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected new file mode 100644 index 00000000000000..6311a55a1f9de1 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values2.ll.expected @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i1 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP10]] +; + %1 = icmp eq i32 %x, 0 + %2 = call i32 @foo(i1 %1) + + %3 = icmp eq i32 %x, 2 + %4 = call i32 @foo(i1 %3) + %5 = icmp ne i32 %4, 0 + %6 = select i1 %5, i32 %4, i32 %2 + + ret i32 %6 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll new file mode 100644 index 00000000000000..a4f4fc67f78d3f --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i1 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i1 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP10]] +; + %1 = icmp eq i32 %x, 0 + %2 = call i32 @foo(i1 %1) + + %3 = icmp eq i32 %x, 2 + %4 = call i32 @foo(i1 %3) + %5 = icmp ne i32 %4, 0 + %6 = select i1 %5, i32 %4, i32 %2 + + %7 = icmp eq i32 %x, 1 + %8 = call i32 @foo(i1 %7) + %9 = icmp ne i32 %8, 0 + %10 = select i1 %9, i32 %8, i32 %6 + + ret i32 %10 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected new file mode 100644 index 00000000000000..08d3c22172ee3f --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values3.ll.expected @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i1 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i1 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP10]] +; + %1 = icmp eq i32 %x, 0 + %2 = call i32 @foo(i1 %1) + + %3 = icmp eq i32 %x, 2 + %4 = call i32 @foo(i1 %3) + %5 = icmp ne i32 %4, 0 + %6 = select i1 %5, i32 %4, i32 %2 + + %7 = icmp eq i32 %x, 1 + %8 = call i32 @foo(i1 %7) + %9 = icmp ne i32 %8, 0 + %10 = select i1 %9, i32 %8, i32 %6 + + ret i32 %10 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll new file mode 100644 index 00000000000000..e3d8452f963101 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; A test that hits the quadratic runtime prevention in the diff algorithm and +; a more complex case of name conflict avoidance. + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[X]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @foo(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @foo(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i32 [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @foo(i32 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @foo(i32 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @foo(i32 [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @foo(i32 [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @foo(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @foo(i32 [[TMP11]]) +; CHECK-NEXT: ret i32 [[TMP12]] +; + %1 = mul i32 %x, 3 + %2 = call i32 @foo(i32 %1) + %3 = call i32 @foo(i32 %2) + %4 = call i32 @foo(i32 %3) + %5 = call i32 @foo(i32 %4) + %6 = call i32 @foo(i32 %5) + %7 = call i32 @foo(i32 %6) + %8 = xor i32 %7, 1 + %9 = call i32 @foo(i32 %8) + %10 = add i32 %9, 1 + %11 = call i32 @foo(i32 %10) + %12 = call i32 @foo(i32 %11) + %13 = call i32 @foo(i32 %12) + + ret i32 %13 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected new file mode 100644 index 00000000000000..e3fa51598c48e3 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values4.ll.expected @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S | FileCheck %s + +; A test that hits the quadratic runtime prevention in the diff algorithm and +; a more complex case of name conflict avoidance. + +define i32 @func(i32 %x) { +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[X]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @foo(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @foo(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @foo(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @foo(i32 [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @foo(i32 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @foo(i32 [[TMP8]]) +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP13]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @foo(i32 [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @foo(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @foo(i32 [[TMP11]]) +; CHECK-NEXT: ret i32 [[TMP12]] +; + %1 = mul i32 %x, 3 + %2 = call i32 @foo(i32 %1) + %3 = call i32 @foo(i32 %2) + %4 = call i32 @foo(i32 %3) + %5 = call i32 @foo(i32 %4) + %6 = call i32 @foo(i32 %5) + %7 = call i32 @foo(i32 %6) + %8 = xor i32 %7, 1 + %9 = call i32 @foo(i32 %8) + %10 = add i32 %9, 1 + %11 = call i32 @foo(i32 %10) + %12 = call i32 @foo(i32 %11) + %13 = call i32 @foo(i32 %12) + + ret i32 %13 +} + +declare i32 @foo(i1) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test new file mode 100644 index 00000000000000..4dfaf5d25c8a69 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values.test @@ -0,0 +1,5 @@ +# RUN: cp -f %S/Inputs/stable_ir_values.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values.ll.expected +# Now test that we can reset all the names +# RUN: %update_test_checks %t.ll --reset-variable-names +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values.ll.expected.reset diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values2.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values2.test new file mode 100644 index 00000000000000..3cebcd52f00521 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values2.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/stable_ir_values2.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values2.ll.expected diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values3.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values3.test new file mode 100644 index 00000000000000..83bc80128541f3 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values3.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/stable_ir_values3.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values3.ll.expected diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values4.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values4.test new file mode 100644 index 00000000000000..89f252f8078064 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values4.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/stable_ir_values4.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values4.ll.expected diff --git a/llvm/test/tools/llvm-readobj/ELF/unwind.test b/llvm/test/tools/llvm-readobj/ELF/unwind.test index 2deb1a587d2438..2e51ec2a61a637 100644 --- a/llvm/test/tools/llvm-readobj/ELF/unwind.test +++ b/llvm/test/tools/llvm-readobj/ELF/unwind.test @@ -96,9 +96,9 @@ # CHECK: Program: # CHECK-NEXT: DW_CFA_def_cfa_offset: +16 -# CHECK-NEXT: DW_CFA_advance_loc: 6 +# CHECK-NEXT: DW_CFA_advance_loc: 6 to 0x4004a6 # CHECK-NEXT: DW_CFA_def_cfa_offset: +24 -# CHECK-NEXT: DW_CFA_advance_loc: 10 +# CHECK-NEXT: DW_CFA_advance_loc: 10 to 0x4004b0 # CHECK-NEXT: DW_CFA_def_cfa_expression: DW_OP_breg7 +8, DW_OP_breg16 +0, DW_OP_lit15, DW_OP_and, DW_OP_lit11, DW_OP_ge, DW_OP_lit3, DW_OP_shl, DW_OP_plus # CHECK-NEXT: DW_CFA_nop: # CHECK-NEXT: DW_CFA_nop: @@ -110,12 +110,12 @@ # CHECK-NEXT: address_range: 0x10 (end : 0x4005c6) # CHECK: Program: -# CHECK-NEXT: DW_CFA_advance_loc: 1 +# CHECK-NEXT: DW_CFA_advance_loc: 1 to 0x4005b7 # CHECK-NEXT: DW_CFA_def_cfa_offset: +16 # CHECK-NEXT: DW_CFA_offset: reg6 -16 -# CHECK-NEXT: DW_CFA_advance_loc: 3 +# CHECK-NEXT: DW_CFA_advance_loc: 3 to 0x4005ba # CHECK-NEXT: DW_CFA_def_cfa_register: reg6 -# CHECK-NEXT: DW_CFA_advance_loc: 11 +# CHECK-NEXT: DW_CFA_advance_loc: 11 to 0x4005c5 # CHECK-NEXT: DW_CFA_def_cfa: reg7 +8 # CHECK-NEXT: DW_CFA_nop: # CHECK-NEXT: DW_CFA_nop: @@ -126,15 +126,15 @@ # CHECK-NEXT: address_range: 0xc7f (end : 0x40124f) # CHECK: Program: -# CHECK-NEXT: DW_CFA_advance_loc: 5 +# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x4005d5 # CHECK-NEXT: DW_CFA_def_cfa: reg10 +0 -# CHECK-NEXT: DW_CFA_advance_loc: 9 +# CHECK-NEXT: DW_CFA_advance_loc: 9 to 0x4005de # CHECK-NEXT: DW_CFA_expression: reg6 DW_OP_breg6 +0 -# CHECK-NEXT: DW_CFA_advance_loc: 5 +# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x4005e3 # CHECK-NEXT: DW_CFA_def_cfa_expression: DW_OP_breg6 -8, DW_OP_deref -# CHECK-NEXT: DW_CFA_advance_loc2: 3174 +# CHECK-NEXT: DW_CFA_advance_loc2: 3174 to 0x401249 # CHECK-NEXT: DW_CFA_def_cfa: reg10 +0 -# CHECK-NEXT: DW_CFA_advance_loc: 5 +# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x40124e # CHECK-NEXT: DW_CFA_def_cfa: reg7 +8 # CHECK-NEXT: DW_CFA_nop: # CHECK-NEXT: DW_CFA_nop: @@ -146,21 +146,21 @@ # CHECK-NEXT: address_range: 0x66 (end : 0x4012b6) # CHECK: Program: -# CHECK-NEXT: DW_CFA_advance_loc: 1 +# CHECK-NEXT: DW_CFA_advance_loc: 1 to 0x401251 # CHECK-NEXT: DW_CFA_def_cfa_offset: +16 # CHECK-NEXT: DW_CFA_offset: reg6 -16 -# CHECK-NEXT: DW_CFA_advance_loc: 3 +# CHECK-NEXT: DW_CFA_advance_loc: 3 to 0x401254 # CHECK-NEXT: DW_CFA_def_cfa_register: reg6 -# CHECK-NEXT: DW_CFA_advance_loc: 2 +# CHECK-NEXT: DW_CFA_advance_loc: 2 to 0x401256 # CHECK-NEXT: DW_CFA_offset: reg15 -24 -# CHECK-NEXT: DW_CFA_advance_loc: 5 +# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x40125b # CHECK-NEXT: DW_CFA_offset: reg14 -32 -# CHECK-NEXT: DW_CFA_advance_loc: 7 +# CHECK-NEXT: DW_CFA_advance_loc: 7 to 0x401262 # CHECK-NEXT: DW_CFA_offset: reg13 -40 # CHECK-NEXT: DW_CFA_offset: reg12 -48 -# CHECK-NEXT: DW_CFA_advance_loc: 8 +# CHECK-NEXT: DW_CFA_advance_loc: 8 to 0x40126a # CHECK-NEXT: DW_CFA_offset: reg3 -56 -# CHECK-NEXT: DW_CFA_advance_loc1: 75 +# CHECK-NEXT: DW_CFA_advance_loc1: 75 to 0x4012b5 # CHECK-NEXT: DW_CFA_def_cfa: reg7 +8 # CHECK-NEXT: DW_CFA_nop: # CHECK-NEXT: DW_CFA_nop: diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index f0b8310a32efd3..09b2a5900eb0b7 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -21,6 +21,7 @@ #include "llvm/ExecutionEngine/Orc/Debugging/DebugInfoSupport.h" #include "llvm/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.h" #include "llvm/ExecutionEngine/Orc/Debugging/PerfSupportPlugin.h" +#include "llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h" #include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h" #include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" #include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" @@ -34,6 +35,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -148,6 +150,10 @@ static cl::opt PerfSupport("perf-support", cl::init(false), cl::Hidden, cl::cat(JITLinkCategory)); +static cl::opt VTuneSupport("vtune-support", + cl::desc("Enable vtune profiling support"), + cl::init(false), cl::Hidden, + cl::cat(JITLinkCategory)); static cl::opt NoProcessSymbols("no-process-syms", cl::desc("Do not resolve to llvm-jitlink process symbols"), @@ -264,7 +270,10 @@ static LLVM_ATTRIBUTE_USED void linkComponents() { << (void *)&llvm_orc_registerJITLoaderGDBAllocAction << '\n' << (void *)&llvm_orc_registerJITLoaderPerfStart << '\n' << (void *)&llvm_orc_registerJITLoaderPerfEnd << '\n' - << (void *)&llvm_orc_registerJITLoaderPerfImpl << '\n'; + << (void *)&llvm_orc_registerJITLoaderPerfImpl << '\n' + << (void *)&llvm_orc_registerVTuneImpl << '\n' + << (void *)&llvm_orc_unregisterVTuneImpl << '\n' + << (void *)&llvm_orc_test_registerVTuneImpl << '\n'; } static bool UseTestResultOverride = false; @@ -1004,6 +1013,14 @@ Session::Session(std::unique_ptr EPC, Error &Err) this->ES.getExecutorProcessControl(), *ProcessSymsJD, true, true))); } + if (VTuneSupport && TT.isOSBinFormatELF()) { + ObjLayer.addPlugin(ExitOnErr(DebugInfoPreservationPlugin::Create())); + ObjLayer.addPlugin(ExitOnErr( + VTuneSupportPlugin::Create(this->ES.getExecutorProcessControl(), + *ProcessSymsJD, /*EmitDebugInfo=*/true, + /*TestMode=*/true))); + } + // Set up the platform. if (!OrcRuntime.empty()) { assert(ProcessSymsJD && "ProcessSymsJD should have been set"); diff --git a/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h b/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h index 687d97abd0232d..2e89463e68d519 100644 --- a/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h +++ b/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h @@ -196,6 +196,7 @@ void PrinterContext::printEHFrame(const Elf_Shdr *EHFrameShdr) const { reportError(std::move(E), ObjF.getFileName()); for (const dwarf::FrameEntry &Entry : EHFrame) { + std::optional InitialLocation; if (const dwarf::CIE *CIE = dyn_cast(&Entry)) { W.startLine() << format("[0x%" PRIx64 "] CIE length=%" PRIu64 "\n", Address + CIE->getOffset(), CIE->getLength()); @@ -214,8 +215,9 @@ void PrinterContext::printEHFrame(const Elf_Shdr *EHFrameShdr) const { Address + FDE->getLinkedCIE()->getOffset()); W.indent(); + InitialLocation = FDE->getInitialLocation(); W.startLine() << format("initial_location: 0x%" PRIx64 "\n", - FDE->getInitialLocation()); + *InitialLocation); W.startLine() << format( "address_range: 0x%" PRIx64 " (end : 0x%" PRIx64 ")\n", FDE->getAddressRange(), @@ -227,7 +229,8 @@ void PrinterContext::printEHFrame(const Elf_Shdr *EHFrameShdr) const { W.indent(); auto DumpOpts = DIDumpOptions(); DumpOpts.IsEH = true; - Entry.cfis().dump(W.getOStream(), DumpOpts, W.getIndentLevel()); + Entry.cfis().dump(W.getOStream(), DumpOpts, W.getIndentLevel(), + InitialLocation); W.unindent(); W.unindent(); W.getOStream() << "\n"; diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 9e0abe7a16df98..6c6897d83a256e 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -2359,6 +2359,20 @@ TEST_F(ComputeKnownBitsTest, ComputeKnownBitsFreeze) { EXPECT_EQ(Known.One.getZExtValue(), 0u); } +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsReturnedRangeConflict) { + parseAssembly( + "declare i16 @foo(i16 returned)\n" + "\n" + "define i16 @test() {\n" + " %A = call i16 @foo(i16 4095), !range !{i16 32, i16 33}\n" + " ret i16 %A\n" + "}\n"); + // The call returns 32 according to range metadata, but 4095 according to the + // returned arg operand. Given the conflicting information we expect that the + // known bits information simply is cleared. + expectKnownBits(/*zero*/ 0u, /*one*/ 0u); +} + TEST_F(ComputeKnownBitsTest, ComputeKnownBitsAddWithRange) { parseAssembly("define void @test(ptr %p) {\n" " %A = load i64, ptr %p, !range !{i64 64, i64 65536}\n" diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 73837279701a97..33155d2c9a9642 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -147,9 +147,9 @@ TEST_F(AArch64GISelMITest, LowerRotatesVector) { LLT S32 = LLT::scalar(32); LLT V4S32 = LLT::fixed_vector(4, S32); auto SrcTrunc = B.buildTrunc(S32, Copies[0]); - auto Src = B.buildSplatVector(V4S32, SrcTrunc); + auto Src = B.buildSplatBuildVector(V4S32, SrcTrunc); auto AmtTrunc = B.buildTrunc(S32, Copies[1]); - auto Amt = B.buildSplatVector(V4S32, AmtTrunc); + auto Amt = B.buildSplatBuildVector(V4S32, AmtTrunc); auto ROTR = B.buildInstr(TargetOpcode::G_ROTR, {V4S32}, {Src, Amt}); AInfo Info(MF->getSubtarget()); diff --git a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp index f52e49df0bcdee..59a86fa5646f36 100644 --- a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp @@ -61,7 +61,7 @@ TEST_F(AArch64GISelMITest, MatchIntConstantSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); int64_t Cst; EXPECT_TRUE(mi_match(FortyTwoSplat.getReg(0), *MRI, m_ICstOrSplat(Cst))); EXPECT_EQ(Cst, 42); @@ -625,7 +625,7 @@ TEST_F(AArch64GISelMITest, MatchSpecificConstantSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); MachineInstrBuilder FortyTwo = B.buildConstant(s64, 42); EXPECT_TRUE(mi_match(FortyTwoSplat.getReg(0), *MRI, m_SpecificICstSplat(42))); @@ -655,7 +655,7 @@ TEST_F(AArch64GISelMITest, MatchSpecificConstantOrSplat) { LLT v4s64 = LLT::fixed_vector(4, s64); MachineInstrBuilder FortyTwoSplat = - B.buildSplatVector(v4s64, B.buildConstant(s64, 42)); + B.buildSplatBuildVector(v4s64, B.buildConstant(s64, 42)); MachineInstrBuilder FortyTwo = B.buildConstant(s64, 42); EXPECT_TRUE( diff --git a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp index 9d8633353e1f9f..6098d4e6239251 100644 --- a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp +++ b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp @@ -107,6 +107,10 @@ TEST(AArch64SVESchedPseudoTesta510, IsCorrect) { runSVEPseudoTestForCPU("cortex-a510"); } +TEST(AArch64SVESchedPseudoTestv1, IsCorrect) { + runSVEPseudoTestForCPU("neoverse-v1"); +} + TEST(AArch64SVESchedPseudoTestv2, IsCorrect) { runSVEPseudoTestForCPU("neoverse-v2"); } diff --git a/llvm/unittests/TextAPI/RecordTests.cpp b/llvm/unittests/TextAPI/RecordTests.cpp index 37289eca1bdf6b..89ffbc4275e0d6 100644 --- a/llvm/unittests/TextAPI/RecordTests.cpp +++ b/llvm/unittests/TextAPI/RecordTests.cpp @@ -19,7 +19,7 @@ TEST(TAPIRecord, Simple) { GlobalRecord API{"_sym", RecordLinkage::Rexported, SymbolFlags::Rexported | SymbolFlags::Text | SymbolFlags::ThreadLocalValue, - GlobalRecord::Kind::Function}; + GlobalRecord::Kind::Function, /*Inlined=*/false}; EXPECT_TRUE(API.isExported()); EXPECT_TRUE(API.isText()); EXPECT_TRUE(API.isRexported()); @@ -30,6 +30,7 @@ TEST(TAPIRecord, Simple) { EXPECT_FALSE(API.isWeakDefined()); EXPECT_FALSE(API.isWeakReferenced()); EXPECT_FALSE(API.isVariable()); + EXPECT_FALSE(API.isInlined()); } TEST(TAPIRecord, SimpleObjC) { diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 53777523ec2a58..ecb19d233a8d1a 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -1,6 +1,8 @@ from __future__ import print_function import argparse +import bisect +import collections import copy import glob import itertools @@ -10,7 +12,7 @@ import sys import shlex -from typing import List +from typing import List, Mapping, Set ##### Common utilities for update_*test_checks.py @@ -420,6 +422,48 @@ def should_add_line_to_output( return True +def collect_original_check_lines(ti: TestInfo, prefix_set: set): + """ + Collect pre-existing check lines into a dictionary `result` which is + returned. + + result[func_name][prefix] is filled with a list of right-hand-sides of check + lines. + """ + result = {} + + current_function = None + for input_line_info in ti.ro_iterlines(): + input_line = input_line_info.line + if current_function is not None: + if input_line == "": + continue + if input_line.lstrip().startswith(";"): + m = CHECK_RE.match(input_line) + if ( + m is not None + and m.group(1) in prefix_set + and m.group(2) not in ["LABEL", "SAME"] + ): + if m.group(1) not in current_function: + current_function[m.group(1)] = [] + current_function[m.group(1)].append(input_line[m.end() :].strip()) + continue + current_function = None + + m = IR_FUNCTION_RE.match(input_line) + if m is not None: + func_name = m.group(1) + if ti.args.function is not None and func_name != ti.args.function: + # When filtering on a specific function, skip all others. + continue + + assert func_name not in result + current_function = result[func_name] = {} + + return result + + # Perform lit-like substitutions def getSubstitutions(sourcepath): sourcedir = os.path.dirname(sourcepath) @@ -491,7 +535,7 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") CHECK_RE = re.compile( - r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:" + r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" ) CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") @@ -933,7 +977,7 @@ def __init__( self.variable_mapping = {} # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'. - def is_local_def_ir_value_match(self, match): + def is_local_def_ir_value(self): return self.ir_prefix == "%" # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'. @@ -947,9 +991,9 @@ def get_ir_prefix_from_ir_value_match(self, match): return re.search(self.ir_prefix, match[0])[0], self.check_prefix # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals - def get_ir_regex_from_ir_value_re_match(self, match): + def get_ir_regex(self): # for backwards compatibility we check locals with '.*' - if self.is_local_def_ir_value_match(match): + if self.is_local_def_ir_value(): return ".*" return self.ir_regexp @@ -988,9 +1032,9 @@ def get_value_definition(self, var, match): regex = "" # always capture a number in the default format capture_start = "[[#" else: - regex = self.get_ir_regex_from_ir_value_re_match(match) + regex = self.get_ir_regex() capture_start = "[[" - if self.is_local_def_ir_value_match(match): + if self.is_local_def_ir_value(): return capture_start + varname + ":" + prefix + regex + "]]" return prefix + capture_start + varname + ":" + regex + "]]" @@ -999,7 +1043,7 @@ def get_value_use(self, var, match, var_prefix=None): if var_prefix is None: var_prefix = self.check_prefix capture_start = "[[#" if self.is_number else "[[" - if self.is_local_def_ir_value_match(match): + if self.is_local_def_ir_value(): return capture_start + self.get_value_name(var, var_prefix) + "]]" prefix = self.get_ir_prefix_from_ir_value_match(match)[0] return prefix + capture_start + self.get_value_name(var, var_prefix) + "]]" @@ -1187,6 +1231,313 @@ def may_clash_with_default_check_prefix_name(check_prefix, var): ) +def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: + """ + Find a large ordered matching between strings in lhs and rhs. + + Think of this as finding the *unchanged* lines in a diff, where the entries + of lhs and rhs are lines of the files being diffed. + + Returns a list of matched (lhs_idx, rhs_idx) pairs. + """ + + if not lhs or not rhs: + return [] + + # Collect matches in reverse order. + matches = [] + + # First, collect a set of candidate matching edges. We limit this to a + # constant multiple of the input size to avoid quadratic runtime. + patterns = collections.defaultdict(lambda: ([], [])) + + for idx in range(len(lhs)): + patterns[lhs[idx]][0].append(idx) + for idx in range(len(rhs)): + patterns[rhs[idx]][1].append(idx) + + multiple_patterns = [] + + candidates = [] + for pattern in patterns.values(): + if not pattern[0] or not pattern[1]: + continue + + if len(pattern[0]) == len(pattern[1]) == 1: + candidates.append((pattern[0][0], pattern[1][0])) + else: + multiple_patterns.append(pattern) + + multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) + + for pattern in multiple_patterns: + if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( + len(lhs) + len(rhs) + ): + break + for lhs_idx in pattern[0]: + for rhs_idx in pattern[1]: + candidates.append((lhs_idx, rhs_idx)) + + if not candidates: + # The LHS and RHS either share nothing in common, or lines are just too + # identical. In that case, let's give up and not match anything. + return [] + + # Compute a maximal crossing-free matching via an algorithm that is + # inspired by a mixture of dynamic programming and line-sweeping in + # discrete geometry. + # + # I would be surprised if this algorithm didn't exist somewhere in the + # literature, but I found it without consciously recalling any + # references, so you'll have to make do with the explanation below. + # Sorry. + # + # The underlying graph is bipartite: + # - nodes on the LHS represent lines in the original check + # - nodes on the RHS represent lines in the new (updated) check + # + # Nodes are implicitly sorted by the corresponding line number. + # Edges (unique_matches) are sorted by the line number on the LHS. + # + # Here's the geometric intuition for the algorithm. + # + # * Plot the edges as points in the plane, with the original line + # number on the X axis and the updated line number on the Y axis. + # * The goal is to find a longest "chain" of points where each point + # is strictly above and to the right of the previous point. + # * The algorithm proceeds by sweeping a vertical line from left to + # right. + # * The algorithm maintains a table where `table[N]` answers the + # question "What is currently the 'best' way to build a chain of N+1 + # points to the left of the vertical line". Here, 'best' means + # that the last point of the chain is a as low as possible (minimal + # Y coordinate). + # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of + # the last point in the chain and `y` is its Y coordinate + # * A key invariant is that the Y values in the table are + # monotonically increasing + # * Thanks to these properties, the table can be used to answer the + # question "What is the longest chain that can be built to the left + # of the vertical line using only points below a certain Y value", + # using a binary search over the table. + # * The algorithm also builds a backlink structure in which every point + # links back to the previous point on a best (longest) chain ending + # at that point + # + # The core loop of the algorithm sweeps the line and updates the table + # and backlink structure for every point that we cross during the sweep. + # Therefore, the algorithm is trivially O(M log M) in the number of + # points. + candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) + + backlinks = [] + table_rhs_idx = [] + table_candidate_idx = [] + for _, rhs_idx in candidates: + candidate_idx = len(backlinks) + ti = bisect.bisect_left(table_rhs_idx, rhs_idx) + + # Update the table to record a best chain ending in the current point. + # There always is one, and if any of the previously visited points had + # a higher Y coordinate, then there is always a previously recorded best + # chain that can be improved upon by using the current point. + # + # There is only one case where there is some ambiguity. If the + # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as + # the current point (this can only happen if the same line appeared + # multiple times on the LHS), then we could choose to keep the + # previously recorded best chain instead. That would bias the algorithm + # differently but should have no systematic impact on the quality of the + # result. + if ti < len(table_rhs_idx): + table_rhs_idx[ti] = rhs_idx + table_candidate_idx[ti] = candidate_idx + else: + table_rhs_idx.append(rhs_idx) + table_candidate_idx.append(candidate_idx) + if ti > 0: + backlinks.append(table_candidate_idx[ti - 1]) + else: + backlinks.append(None) + + # Commit to names in the matching by walking the backlinks. Recursively + # attempt to fill in more matches in-betweem. + match_idx = table_candidate_idx[-1] + while match_idx is not None: + current = candidates[match_idx] + matches.append(current) + match_idx = backlinks[match_idx] + + matches.reverse() + return matches + + +VARIABLE_TAG = "[[@@]]" +METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") +NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") + + +class CheckValueInfo: + def __init__( + self, + nameless_value: NamelessValue, + var: str, + prefix: str, + ): + self.nameless_value = nameless_value + self.var = var + self.prefix = prefix + + +# Represent a check line in a way that allows us to compare check lines while +# ignoring some or all of the FileCheck variable names. +class CheckLineInfo: + def __init__(self, line, values): + # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG + self.line: str = line + + # Information on each FileCheck variable name occurrences in the line + self.values: List[CheckValueInfo] = values + + def __repr__(self): + return f"CheckLineInfo(line={self.line}, self.values={self.values})" + + +def remap_metavar_names( + old_line_infos: List[CheckLineInfo], + new_line_infos: List[CheckLineInfo], + committed_names: Set[str], +) -> Mapping[str, str]: + """ + Map all FileCheck variable names that appear in new_line_infos to new + FileCheck variable names in an attempt to reduce the diff from old_line_infos + to new_line_infos. + + This is done by: + * Matching old check lines and new check lines using a diffing algorithm + applied after replacing names with wildcards. + * Committing to variable names such that the matched lines become equal + (without wildcards) if possible + * This is done recursively to handle cases where many lines are equal + after wildcard replacement + """ + # Initialize uncommitted identity mappings + new_mapping = {} + for line in new_line_infos: + for value in line.values: + new_mapping[value.var] = value.var + + # Recursively commit to the identity mapping or find a better one + def recurse(old_begin, old_end, new_begin, new_end): + if old_begin == old_end or new_begin == new_end: + return + + # Find a matching of lines where uncommitted names are replaced + # with a placeholder. + def diffify_line(line, mapper): + values = [] + for value in line.values: + mapped = mapper(value.var) + values.append(mapped if mapped in committed_names else "?") + return line.line.strip() + " @@@ " + " @ ".join(values) + + lhs_lines = [ + diffify_line(line, lambda x: x) + for line in old_line_infos[old_begin:old_end] + ] + rhs_lines = [ + diffify_line(line, lambda x: new_mapping[x]) + for line in new_line_infos[new_begin:new_end] + ] + + candidate_matches = find_diff_matching(lhs_lines, rhs_lines) + + # Apply commits greedily on a match-by-match basis + matches = [(-1, -1)] + committed_anything = False + for lhs_idx, rhs_idx in candidate_matches: + lhs_line = old_line_infos[lhs_idx] + rhs_line = new_line_infos[rhs_idx] + + local_commits = {} + + for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): + if new_mapping[rhs_value.var] in committed_names: + # The new value has already been committed. If it was mapped + # to the same name as the original value, we can consider + # committing other values from this line. Otherwise, we + # should ignore this line. + if new_mapping[rhs_value.var] == lhs_value.var: + continue + else: + break + + if rhs_value.var in local_commits: + # Same, but for a possible commit happening on the same line + if local_commits[rhs_value.var] == lhs_value.var: + continue + else: + break + + if lhs_value.var in committed_names: + # We can't map this value because the name we would map it to has already been + # committed for something else. Give up on this line. + break + + local_commits[rhs_value.var] = lhs_value.var + else: + # No reason not to add any commitments for this line + for rhs_var, lhs_var in local_commits.items(): + new_mapping[rhs_var] = lhs_var + committed_names.add(lhs_var) + committed_anything = True + + if ( + lhs_var != rhs_var + and lhs_var in new_mapping + and new_mapping[lhs_var] == lhs_var + ): + new_mapping[lhs_var] = "conflict_" + lhs_var + + matches.append((lhs_idx, rhs_idx)) + + matches.append((old_end, new_end)) + + # Recursively handle sequences between matches + if committed_anything: + for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): + recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) + + recurse(0, len(old_line_infos), 0, len(new_line_infos)) + + # Commit to remaining names and resolve conflicts + for new_name, mapped_name in new_mapping.items(): + if mapped_name in committed_names: + continue + if not mapped_name.startswith("conflict_"): + assert mapped_name == new_name + committed_names.add(mapped_name) + + for new_name, mapped_name in new_mapping.items(): + if mapped_name in committed_names: + continue + assert mapped_name.startswith("conflict_") + + m = NUMERIC_SUFFIX_RE.search(new_name) + base_name = new_name[: m.start()] + suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 + while True: + candidate = f"{base_name}{suffix}" + if candidate not in committed_names: + new_mapping[new_name] = candidate + committed_names.add(candidate) + break + suffix += 1 + + return new_mapping + + def generalize_check_lines_common( lines, is_analyze, @@ -1196,11 +1547,12 @@ def generalize_check_lines_common( nameless_value_regex, is_asm, preserve_names, + original_check_lines=None, ): # This gets called for each match that occurs in # a line. We transform variables we haven't seen # into defs, and variables we have seen into uses. - def transform_line_vars(match): + def transform_line_vars(match, transform_locals=True): var = get_name_from_ir_value_match(match) nameless_value = get_nameless_value_from_match(match, nameless_values) if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var): @@ -1209,7 +1561,9 @@ def transform_line_vars(match): " with scripted FileCheck name." % (var,) ) key = (var, nameless_value.check_key) - is_local_def = nameless_value.is_local_def_ir_value_match(match) + is_local_def = nameless_value.is_local_def_ir_value() + if is_local_def and not transform_locals: + return None if is_local_def and key in vars_seen: rv = nameless_value.get_value_use(var, match) elif not is_local_def and key in global_vars_seen: @@ -1228,13 +1582,15 @@ def transform_line_vars(match): # including the commas and spaces. return match.group(1) + rv + match.group(match.lastindex) - lines_with_def = [] + def transform_non_local_line_vars(match): + return transform_line_vars(match, False) + multiple_braces_re = re.compile(r"({{+)|(}}+)") def escape_braces(match_obj): return '{{' + re.escape(match_obj.group(0)) + '}}' - for i, line in enumerate(lines): - if not is_asm and not is_analyze: + if not is_asm and not is_analyze: + for i, line in enumerate(lines): # An IR variable named '%.' matches the FileCheck regex string. line = line.replace("%.", "%dot") for regex in _global_hex_value_regex: @@ -1252,25 +1608,136 @@ def escape_braces(match_obj): # Ignore any comments, since the check lines will too. scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) lines[i] = scrubbed_line - if not preserve_names: - # It can happen that two matches are back-to-back and for some reason sub - # will not replace both of them. For now we work around this by - # substituting until there is no more match. - changed = True - while changed: - (lines[i], changed) = nameless_value_regex.subn( - transform_line_vars, lines[i], count=1 - ) - if is_analyze: + + if not preserve_names: + if is_asm: + for i, _ in enumerate(lines): + # It can happen that two matches are back-to-back and for some reason sub + # will not replace both of them. For now we work around this by + # substituting until there is no more match. + changed = True + while changed: + (lines[i], changed) = nameless_value_regex.subn( + transform_line_vars, lines[i], count=1 + ) + else: + # LLVM IR case. Start by handling global meta variables (global IR variables, + # metadata, attributes) + for i, _ in enumerate(lines): + start = 0 + while True: + m = nameless_value_regex.search(lines[i][start:]) + if m is None: + break + start += m.start() + sub = transform_non_local_line_vars(m) + if sub is not None: + lines[i] = ( + lines[i][:start] + sub + lines[i][start + len(m.group(0)) :] + ) + start += 1 + + # Collect information about new check lines and original check lines (if any) + new_line_infos = [] + for line in lines: + filtered_line = "" + values = [] + while True: + m = nameless_value_regex.search(line) + if m is None: + filtered_line += line + break + + var = get_name_from_ir_value_match(m) + nameless_value = get_nameless_value_from_match(m, nameless_values) + var = nameless_value.get_value_name( + var, nameless_value.check_prefix + ) + + # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. + filtered_line += ( + line[: m.start()] + + m.group(1) + + VARIABLE_TAG + + m.group(m.lastindex) + ) + line = line[m.end() :] + values.append( + CheckValueInfo( + nameless_value=nameless_value, + var=var, + prefix=nameless_value.get_ir_prefix_from_ir_value_match(m)[ + 0 + ], + ) + ) + new_line_infos.append(CheckLineInfo(filtered_line, values)) + + orig_line_infos = [] + for line in original_check_lines or []: + filtered_line = "" + values = [] + while True: + m = METAVAR_RE.search(line) + if m is None: + filtered_line += line + break + + # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. + filtered_line += line[: m.start()] + VARIABLE_TAG + line = line[m.end() :] + values.append( + CheckValueInfo( + nameless_value=None, + var=m.group(1), + prefix=None, + ) + ) + orig_line_infos.append(CheckLineInfo(filtered_line, values)) + + # Compute the variable name mapping + committed_names = set(vars_seen) + + mapping = remap_metavar_names( + orig_line_infos, new_line_infos, committed_names + ) + + for i, line_info in enumerate(new_line_infos): + line_template = line_info.line + line = "" + + for value in line_info.values: + idx = line_template.find(VARIABLE_TAG) + line += line_template[:idx] + line_template = line_template[idx + len(VARIABLE_TAG) :] + + key = (mapping[value.var], nameless_value.check_key) + is_local_def = nameless_value.is_local_def_ir_value() + if is_local_def: + if mapping[value.var] in vars_seen: + line += f"[[{mapping[value.var]}]]" + else: + line += f"[[{mapping[value.var]}:{value.prefix}{value.nameless_value.get_ir_regex()}]]" + vars_seen.add(mapping[value.var]) + else: + raise RuntimeError("not implemented") + + line += line_template + + lines[i] = line + + if is_analyze: + for i, _ in enumerate(lines): # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) lines[i] = scrubbed_line + return lines # Replace IR value defs and uses with FileCheck variables. def generalize_check_lines( - lines, is_analyze, vars_seen, global_vars_seen, preserve_names + lines, is_analyze, vars_seen, global_vars_seen, preserve_names, original_check_lines ): return generalize_check_lines_common( lines, @@ -1281,6 +1748,7 @@ def generalize_check_lines( IR_VALUE_RE, False, preserve_names, + original_check_lines=original_check_lines, ) @@ -1337,6 +1805,7 @@ def add_checks( global_vars_seen_dict, is_filtered, preserve_names=False, + original_check_lines: Mapping[str, List[str]] = {}, ): # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. prefix_exclusions = set() @@ -1409,6 +1878,7 @@ def add_checks( vars_seen, global_vars_seen, preserve_names, + original_check_lines=[], )[0] func_name_separator = func_dict[checkprefix][func_name].func_name_separator if "[[" in args_and_sig: @@ -1516,7 +1986,12 @@ def add_checks( # to variable naming fashions. else: func_body = generalize_check_lines( - func_body, False, vars_seen, global_vars_seen, preserve_names + func_body, + False, + vars_seen, + global_vars_seen, + preserve_names, + original_check_lines=original_check_lines.get(checkprefix), ) # This could be selectively enabled with an optional invocation argument. @@ -1578,6 +2053,7 @@ def add_ir_checks( version, global_vars_seen_dict, is_filtered, + original_check_lines={}, ): # Label format is based on IR string. if function_sig and version > 1: @@ -1602,6 +2078,7 @@ def add_ir_checks( global_vars_seen_dict, is_filtered, preserve_names, + original_check_lines=original_check_lines, ) @@ -1890,6 +2367,7 @@ def get_autogennote_suffix(parser, args): "llvm_bin", "verbose", "force_update", + "reset_variable_names", ): continue value = getattr(args, action.dest) diff --git a/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn index 441d57187cd2db..a20066436a3bf1 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn @@ -12,6 +12,7 @@ unittest("ClangReplInterpreterTests") { ] sources = [ "CodeCompletionTest.cpp", + "IncrementalCompilerBuilderTest.cpp", "IncrementalProcessingTest.cpp", "InterpreterTest.cpp", ] diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Debugging/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Debugging/BUILD.gn index 1d3fc6cfdfaaa6..5610679ff333ee 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Debugging/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Debugging/BUILD.gn @@ -13,6 +13,7 @@ static_library("Debugging") { "DebuggerSupportPlugin.cpp", "LLJITUtilsCBindings.cpp", "PerfSupportPlugin.cpp", + "VTuneSupportPlugin.cpp", ] if (current_os == "linux") { libs = [ "rt" ] diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn index f34855e8e1cc60..d62f5042c94638 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn @@ -9,6 +9,7 @@ static_library("TargetProcess") { "ExecutorSharedMemoryMapperService.cpp", "JITLoaderGDB.cpp", "JITLoaderPerf.cpp", + "JITLoaderVTune.cpp", "OrcRTBootstrap.cpp", "RegisterEHFrames.cpp", "SimpleExecutorDylibManager.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn index 00e1888da64d26..131308db2aa557 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn @@ -23,12 +23,13 @@ static_library("Instrumentation") { "InstrProfiling.cpp", "Instrumentation.cpp", "KCFI.cpp", - "PGOForceFunctionAttrs.cpp", "MemProfiler.cpp", "MemorySanitizer.cpp", + "PGOForceFunctionAttrs.cpp", "PGOInstrumentation.cpp", "PGOMemOPSizeOpt.cpp", "PoisonChecking.cpp", + "RemoveTrapsPass.cpp", "SanitizerBinaryMetadata.cpp", "SanitizerCoverage.cpp", "ThreadSanitizer.cpp", diff --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py index b5077d79351378..04808ce6bb1c6f 100755 --- a/llvm/utils/update_test_checks.py +++ b/llvm/utils/update_test_checks.py @@ -85,6 +85,12 @@ def main(): choices=["none", "smart", "all"], help="Check global entries (global variables, metadata, attribute sets, ...) for functions", ) + parser.add_argument( + "--reset-variable-names", + action="store_true", + help="Reset all variable names to correspond closely to the variable names in IR. " + "This tends to result in larger diffs.", + ) parser.add_argument("tests", nargs="+") initial_args = common.parse_commandline_args(parser) @@ -170,13 +176,19 @@ def main(): ) builder.processed_prefixes(prefixes) + prefix_set = set( + [prefix for prefixes, _, _ in prefix_list for prefix in prefixes] + ) + + if not ti.args.reset_variable_names: + original_check_lines = common.collect_original_check_lines(ti, prefix_set) + else: + original_check_lines = {} + func_dict = builder.finish_and_get_func_dict() is_in_function = False is_in_function_start = False has_checked_pre_function_globals = False - prefix_set = set( - [prefix for prefixes, _, _ in prefix_list for prefix in prefixes] - ) common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) output_lines = [] @@ -230,6 +242,7 @@ def main(): args.version, global_vars_seen_dict, is_filtered=builder.is_filtered(), + original_check_lines=original_check_lines.get(func, {}), ), ) ) @@ -261,6 +274,9 @@ def main(): args.version, global_vars_seen_dict, is_filtered=builder.is_filtered(), + original_check_lines=original_check_lines.get( + func_name, {} + ), ) ) is_in_function_start = False diff --git a/mlir/include/mlir-c/BuiltinAttributes.h b/mlir/include/mlir-c/BuiltinAttributes.h index 5070defddb52d6..9fd1a123e4a445 100644 --- a/mlir/include/mlir-c/BuiltinAttributes.h +++ b/mlir/include/mlir-c/BuiltinAttributes.h @@ -266,6 +266,10 @@ mlirSymbolRefAttrGetNestedReference(MlirAttribute attr, intptr_t pos); /// Returns the typeID of an SymbolRef attribute. MLIR_CAPI_EXPORTED MlirTypeID mlirSymbolRefAttrGetTypeID(void); +/// Creates a DisctinctAttr with the referenced attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirDisctinctAttrCreate(MlirAttribute referencedAttr); + //===----------------------------------------------------------------------===// // Flat SymbolRef attribute. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index ac216b01f364d4..d823afb659c8db 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -11,6 +11,7 @@ #define MLIR_C_DIALECT_LLVM_H #include "mlir-c/IR.h" +#include "mlir-c/Support.h" #ifdef __cplusplus extern "C" { @@ -98,6 +99,236 @@ MLIR_CAPI_EXPORTED MlirLogicalResult mlirLLVMStructTypeSetBody(MlirType structType, intptr_t nFieldTypes, MlirType const *fieldTypes, bool isPacked); +enum MlirLLVMCConv { + MlirLLVMCConvC = 0, + MlirLLVMCConvFast = 8, + MlirLLVMCConvCold = 9, + MlirLLVMCConvGHC = 10, + MlirLLVMCConvHiPE = 11, + MlirLLVMCConvAnyReg = 13, + MlirLLVMCConvPreserveMost = 14, + MlirLLVMCConvPreserveAll = 15, + MlirLLVMCConvSwift = 16, + MlirLLVMCConvCXX_FAST_TLS = 17, + MlirLLVMCConvTail = 18, + MlirLLVMCConvCFGuard_Check = 19, + MlirLLVMCConvSwiftTail = 20, + MlirLLVMCConvX86_StdCall = 64, + MlirLLVMCConvX86_FastCall = 65, + MlirLLVMCConvARM_APCS = 66, + MlirLLVMCConvARM_AAPCS = 67, + MlirLLVMCConvARM_AAPCS_VFP = 68, + MlirLLVMCConvMSP430_INTR = 69, + MlirLLVMCConvX86_ThisCall = 70, + MlirLLVMCConvPTX_Kernel = 71, + MlirLLVMCConvPTX_Device = 72, + MlirLLVMCConvSPIR_FUNC = 75, + MlirLLVMCConvSPIR_KERNEL = 76, + MlirLLVMCConvIntel_OCL_BI = 77, + MlirLLVMCConvX86_64_SysV = 78, + MlirLLVMCConvWin64 = 79, + MlirLLVMCConvX86_VectorCall = 80, + MlirLLVMCConvDUMMY_HHVM = 81, + MlirLLVMCConvDUMMY_HHVM_C = 82, + MlirLLVMCConvX86_INTR = 83, + MlirLLVMCConvAVR_INTR = 84, + MlirLLVMCConvAVR_BUILTIN = 86, + MlirLLVMCConvAMDGPU_VS = 87, + MlirLLVMCConvAMDGPU_GS = 88, + MlirLLVMCConvAMDGPU_CS = 90, + MlirLLVMCConvAMDGPU_KERNEL = 91, + MlirLLVMCConvX86_RegCall = 92, + MlirLLVMCConvAMDGPU_HS = 93, + MlirLLVMCConvMSP430_BUILTIN = 94, + MlirLLVMCConvAMDGPU_LS = 95, + MlirLLVMCConvAMDGPU_ES = 96, + MlirLLVMCConvAArch64_VectorCall = 97, + MlirLLVMCConvAArch64_SVE_VectorCall = 98, + MlirLLVMCConvWASM_EmscriptenInvoke = 99, + MlirLLVMCConvAMDGPU_Gfx = 100, + MlirLLVMCConvM68k_INTR = 101, +}; +typedef enum MlirLLVMCConv MlirLLVMCConv; + +/// Creates a LLVM CConv attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMCConvAttrGet(MlirContext ctx, + MlirLLVMCConv cconv); + +enum MlirLLVMComdat { + MlirLLVMComdatAny = 0, + MlirLLVMComdatExactMatch = 1, + MlirLLVMComdatLargest = 2, + MlirLLVMComdatNoDeduplicate = 3, + MlirLLVMComdatSameSize = 4, +}; +typedef enum MlirLLVMComdat MlirLLVMComdat; + +/// Creates a LLVM Comdat attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMComdatAttrGet(MlirContext ctx, + MlirLLVMComdat comdat); + +enum MlirLLVMLinkage { + MlirLLVMLinkagePrivate = 0, + MlirLLVMLinkageInternal = 1, + MlirLLVMLinkageAvailableExternally = 2, + MlirLLVMLinkageLinkonce = 3, + MlirLLVMLinkageWeak = 4, + MlirLLVMLinkageCommon = 5, + MlirLLVMLinkageAppending = 6, + MlirLLVMLinkageExternWeak = 7, + MlirLLVMLinkageLinkonceODR = 8, + MlirLLVMLinkageWeakODR = 9, + MlirLLVMLinkageExternal = 10, +}; +typedef enum MlirLLVMLinkage MlirLLVMLinkage; + +/// Creates a LLVM Linkage attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMLinkageAttrGet(MlirContext ctx, MlirLLVMLinkage linkage); + +/// Creates a LLVM DINullType attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDINullTypeAttrGet(MlirContext ctx); + +/// Creates a LLVM DIExpressionElem attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDIExpressionElemAttrGet(MlirContext ctx, unsigned int opcode, + intptr_t nArguments, uint64_t const *arguments); + +/// Creates a LLVM DIExpression attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIExpressionAttrGet( + MlirContext ctx, intptr_t nOperations, MlirAttribute const *operations); + +enum MlirLLVMTypeEncoding { + MlirLLVMTypeEncodingAddress = 0x1, + MlirLLVMTypeEncodingBoolean = 0x2, + MlirLLVMTypeEncodingComplexFloat = 0x31, + MlirLLVMTypeEncodingFloatT = 0x4, + MlirLLVMTypeEncodingSigned = 0x5, + MlirLLVMTypeEncodingSignedChar = 0x6, + MlirLLVMTypeEncodingUnsigned = 0x7, + MlirLLVMTypeEncodingUnsignedChar = 0x08, + MlirLLVMTypeEncodingImaginaryFloat = 0x09, + MlirLLVMTypeEncodingPackedDecimal = 0x0a, + MlirLLVMTypeEncodingNumericString = 0x0b, + MlirLLVMTypeEncodingEdited = 0x0c, + MlirLLVMTypeEncodingSignedFixed = 0x0d, + MlirLLVMTypeEncodingUnsignedFixed = 0x0e, + MlirLLVMTypeEncodingDecimalFloat = 0x0f, + MlirLLVMTypeEncodingUTF = 0x10, + MlirLLVMTypeEncodingUCS = 0x11, + MlirLLVMTypeEncodingASCII = 0x12, + MlirLLVMTypeEncodingLoUser = 0x80, + MlirLLVMTypeEncodingHiUser = 0xff, +}; +typedef enum MlirLLVMTypeEncoding MlirLLVMTypeEncoding; + +/// Creates a LLVM DIBasicType attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIBasicTypeAttrGet( + MlirContext ctx, unsigned int tag, MlirAttribute name, uint64_t sizeInBits, + MlirLLVMTypeEncoding encoding); + +/// Creates a LLVM DICompositeType attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDICompositeTypeAttrGet( + MlirContext ctx, unsigned int tag, MlirAttribute name, MlirAttribute file, + uint32_t line, MlirAttribute scope, MlirAttribute baseType, int64_t flags, + uint64_t sizeInBits, uint64_t alignInBits, intptr_t nElements, + MlirAttribute const *elements); + +/// Creates a LLVM DIDerivedType attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIDerivedTypeAttrGet( + MlirContext ctx, unsigned int tag, MlirAttribute name, + MlirAttribute baseType, uint64_t sizeInBits, uint32_t alignInBits, + uint64_t offsetInBits); + +/// Gets the base type from a LLVM DIDerivedType attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDIDerivedTypeAttrGetBaseType(MlirAttribute diDerivedType); + +/// Creates a LLVM DIFileAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIFileAttrGet(MlirContext ctx, + MlirAttribute name, + MlirAttribute directory); + +enum MlirLLVMDIEmissionKind { + MlirLLVMDIEmissionKindNone = 0, + MlirLLVMDIEmissionKindFull = 1, + MlirLLVMDIEmissionKindLineTablesOnly = 2, + MlirLLVMDIEmissionKindDebugDirectivesOnly = 3, +}; +typedef enum MlirLLVMDIEmissionKind MlirLLVMDIEmissionKind; + +/// Creates a LLVM DICompileUnit attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDICompileUnitAttrGet( + MlirContext ctx, MlirAttribute id, unsigned int sourceLanguage, + MlirAttribute file, MlirAttribute producer, bool isOptimized, + MlirLLVMDIEmissionKind emissionKind); + +/// Creates a LLVM DIFlags attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIFlagsAttrGet(MlirContext ctx, + uint64_t value); + +/// Creates a LLVM DILexicalBlock attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDILexicalBlockAttrGet( + MlirContext ctx, MlirAttribute scope, MlirAttribute file, unsigned int line, + unsigned int column); + +/// Creates a LLVM DILexicalBlockFile attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDILexicalBlockFileAttrGet( + MlirContext ctx, MlirAttribute scope, MlirAttribute file, + unsigned int discriminator); + +/// Creates a LLVM DILocalVariableAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDILocalVariableAttrGet( + MlirContext ctx, MlirAttribute scope, MlirAttribute name, + MlirAttribute diFile, unsigned int line, unsigned int arg, + unsigned int alignInBits, MlirAttribute diType); + +/// Creates a LLVM DISubprogramAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDISubprogramAttrGet( + MlirContext ctx, MlirAttribute id, MlirAttribute compileUnit, + MlirAttribute scope, MlirAttribute name, MlirAttribute linkageName, + MlirAttribute file, unsigned int line, unsigned int scopeLine, + uint64_t subprogramFlags, MlirAttribute type); + +/// Gets the scope from this DISubprogramAttr. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubprogramAttrGetScope(MlirAttribute diSubprogram); + +/// Gets the line from this DISubprogramAttr. +MLIR_CAPI_EXPORTED unsigned int +mlirLLVMDISubprogramAttrGetLine(MlirAttribute diSubprogram); + +/// Gets the scope line from this DISubprogram. +MLIR_CAPI_EXPORTED unsigned int +mlirLLVMDISubprogramAttrGetScopeLine(MlirAttribute diSubprogram); + +/// Gets the compile unit from this DISubprogram. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubprogramAttrGetCompileUnit(MlirAttribute diSubprogram); + +/// Gets the file from this DISubprogramAttr. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubprogramAttrGetFile(MlirAttribute diSubprogram); + +/// Gets the type from this DISubprogramAttr. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubprogramAttrGetType(MlirAttribute diSubprogram); + +/// Creates a LLVM DISubroutineTypeAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubroutineTypeAttrGet(MlirContext ctx, unsigned int callingConvention, + intptr_t nTypes, MlirAttribute const *types); + +/// Creates a LLVM DIModuleAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIModuleAttrGet( + MlirContext ctx, MlirAttribute file, MlirAttribute scope, + MlirAttribute name, MlirAttribute configMacros, MlirAttribute includePath, + MlirAttribute apinotes, unsigned int line, bool isDecl); + +/// Gets the scope of this DIModuleAttr. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDIModuleAttrGetScope(MlirAttribute diModule); + #ifdef __cplusplus } #endif diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h index d8297c87e27dcd..51e2c3df9e9f94 100644 --- a/mlir/include/mlir/Conversion/Passes.h +++ b/mlir/include/mlir/Conversion/Passes.h @@ -14,6 +14,7 @@ #include "mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h" #include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" #include "mlir/Conversion/ArithToArmSME/ArithToArmSME.h" +#include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" #include "mlir/Conversion/ArithToSPIRV/ArithToSPIRV.h" #include "mlir/Conversion/ArmNeon2dToIntr/ArmNeon2dToIntr.h" diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h index aa36c1d7ff1d66..a729bc99b987cd 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -159,6 +159,8 @@ struct BufferResultsToOutParamsOpts { return true; }; + /// Memcpy function; used to create a copy between two memrefs. + /// If this is empty, memref.copy is used. std::optional memCpyFn; /// If true, the pass adds a "bufferize.result" attribute to each output diff --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt index 9788e24e4a1d91..2da79011fa26a3 100644 --- a/mlir/include/mlir/Dialect/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/CMakeLists.txt @@ -40,3 +40,4 @@ add_subdirectory(UB) add_subdirectory(Utils) add_subdirectory(Vector) add_subdirectory(X86Vector) +add_subdirectory(XeGPU) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/AllInterfaces.h b/mlir/include/mlir/Dialect/Linalg/Transforms/AllInterfaces.h new file mode 100644 index 00000000000000..a69751e072b797 --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/AllInterfaces.h @@ -0,0 +1,26 @@ +//===- AllInterfaces.h - ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a common entry point for registering all external +// interface implementations to the linalg dialect. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LINALG_TRANSFORMS_ALLINTERFACES_H +#define MLIR_DIALECT_LINALG_TRANSFORMS_ALLINTERFACES_H + +namespace mlir { +class DialectRegistry; + +namespace linalg { +void registerAllDialectInterfaceImplementations(DialectRegistry ®istry); +} // namespace linalg + +} // namespace mlir + +#endif // MLIR_DIALECT_LINALG_TRANSFORMS_ALLINTERFACES_H diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h new file mode 100644 index 00000000000000..c57501ea86b7ed --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h @@ -0,0 +1,20 @@ +//===- MeshShardingInterfaceImpl.h ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LINALG_MESHSHARDINGINTERFACEIMPL_H +#define MLIR_DIALECT_LINALG_MESHSHARDINGINTERFACEIMPL_H + +namespace mlir { +class DialectRegistry; + +namespace linalg { +void registerMeshShardingInterfaceExternalModels(DialectRegistry ®istry); +} // namespace linalg +} // namespace mlir + +#endif // MLIR_DIALECT_LINALG_MESHSHARDINGINTERFACEIMPL_H diff --git a/mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td b/mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td index fc2acc70381ef7..9d9b5892e1a51f 100644 --- a/mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td +++ b/mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td @@ -46,6 +46,12 @@ def Mesh_ReductionKind : I32EnumAttr<"ReductionKind", I32EnumAttrCase<"Sum", 1, "sum">, I32EnumAttrCase<"Max", 2, "max">, I32EnumAttrCase<"Min", 3, "min">, + I32EnumAttrCase<"Product", 4, "product">, + // Arithmetic mean. + I32EnumAttrCase<"Average", 5, "average">, + I32EnumAttrCase<"BitwiseAnd", 6, "bitwise_and">, + I32EnumAttrCase<"BitwiseOr", 7, "bitwise_or">, + I32EnumAttrCase<"BitwiseXor", 8, "bitwise_xor">, I32EnumAttrCase<"Generic", 100, "generic"> ]> { let genSpecializedAttr = 0; diff --git a/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td b/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td index b9cd15e2062669..8e1e475463585e 100644 --- a/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td +++ b/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td @@ -353,6 +353,10 @@ def Mesh_AllReduceOp : Mesh_CollectiveCommunicationOpBase<"all_reduce", [ attr-dict `:` type($input) `->` type($result) }]; let hasCanonicalizer = 1; + let builders = [ + OpBuilder<(ins "Value":$input, "StringRef":$mesh, + "ArrayRef":$meshAxes, "ReductionKind":$reduction)> + ]; } def Mesh_AllSliceOp : Mesh_CollectiveCommunicationOpBase<"all_slice", [ diff --git a/mlir/include/mlir/Dialect/Mesh/Interfaces/ShardingInterfaceImpl.h b/mlir/include/mlir/Dialect/Mesh/Interfaces/ShardingInterfaceImpl.h index ffc9b6fb18be53..ab4df2ab028d43 100644 --- a/mlir/include/mlir/Dialect/Mesh/Interfaces/ShardingInterfaceImpl.h +++ b/mlir/include/mlir/Dialect/Mesh/Interfaces/ShardingInterfaceImpl.h @@ -22,6 +22,24 @@ class SymbolTableCollection; namespace mesh { +// Retrieve the mesh axes corresponding to each operation loop iterator based +// on the provided shardings for the op's operands and results. +// Assumes that the indexingMaps are projected permutations. +ShardingArray getMeshAxisAssignmentForLoopIterators( + ArrayRef operandShardings, + ArrayRef resultShardings, + ArrayRef loopIteratorTypes, + ArrayRef indexingMaps); + +bool isAtLeastOneReductionIteratorSharded( + ArrayRef loopIteratorTypes, + ArrayRef> meshAxisAssignmentForLoopIterators); + +// Get the set of mesh axes that correspond to reduction loop iterators. +SmallVector getReductionMeshAxes( + ArrayRef loopIteratorTypes, + ArrayRef> meshAxisAssignmentForLoopIterators); + // Inserts a clone of the operation that has all ranked tensor // arguments/results sharded. void spmdizeTriviallyShardableOperation( diff --git a/mlir/include/mlir/Dialect/Mesh/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Mesh/Transforms/Transforms.h index aeab28961a4e1e..be82e2af399dc8 100644 --- a/mlir/include/mlir/Dialect/Mesh/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Mesh/Transforms/Transforms.h @@ -13,6 +13,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Value.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/ArrayRef.h" namespace mlir { class RewritePatternSet; @@ -37,6 +38,11 @@ TypedValue createCollectiveProcessGroupSize(MeshOp mesh, ArrayRef axes, ImplicitLocOpBuilder &builder); +// Get process linear index along the given mesh axes. +TypedValue createProcessLinearIndex(StringRef mesh, + ArrayRef meshAxes, + ImplicitLocOpBuilder &builder); + } // namespace mesh } // namespace mlir diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h index bb3b9617c24edb..0c8e0b45878206 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -133,6 +133,10 @@ static constexpr StringLiteral getRoutineInfoAttrName() { return StringLiteral("acc.routine_info"); } +static constexpr StringLiteral getCombinedConstructsAttrName() { + return CombinedConstructsTypeAttr::name; +} + struct RuntimeCounters : public mlir::SideEffects::Resource::Base { mlir::StringRef getName() final { return "AccRuntimeCounters"; } diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 6da7a742bbed8c..b5ad46361fa698 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -218,6 +218,24 @@ def GangArgTypeArrayAttr : let constBuilderCall = ?; } +// Combined constructs enumerations +def OpenACC_KernelsLoop : I32EnumAttrCase<"KernelsLoop", 1, "kernels_loop">; +def OpenACC_ParallelLoop : I32EnumAttrCase<"ParallelLoop", 2, "parallel_loop">; +def OpenACC_SerialLoop : I32EnumAttrCase<"SerialLoop", 3, "serial_loop">; + +def OpenACC_CombinedConstructsType : I32EnumAttr<"CombinedConstructsType", + "Differentiate between combined constructs", + [OpenACC_KernelsLoop, OpenACC_ParallelLoop, OpenACC_SerialLoop]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::acc"; +} + +def OpenACC_CombinedConstructsAttr : EnumAttr { + let assemblyFormat = [{ ```<` $value `>` }]; +} + // Define a resource for the OpenACC runtime counters. def OpenACC_RuntimeCounters : Resource<"::mlir::acc::RuntimeCounters">; @@ -933,7 +951,8 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel", Variadic:$gangFirstPrivateOperands, OptionalAttr:$firstprivatizations, Variadic:$dataClauseOperands, - OptionalAttr:$defaultAttr); + OptionalAttr:$defaultAttr, + UnitAttr:$combined); let regions = (region AnyRegion:$region); @@ -993,6 +1012,7 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel", }]; let assemblyFormat = [{ + ( `combined` `(` `loop` `)` $combined^)? oilist( `dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)` | `async` `(` custom($asyncOperands, @@ -1068,7 +1088,8 @@ def OpenACC_SerialOp : OpenACC_Op<"serial", Variadic:$gangFirstPrivateOperands, OptionalAttr:$firstprivatizations, Variadic:$dataClauseOperands, - OptionalAttr:$defaultAttr); + OptionalAttr:$defaultAttr, + UnitAttr:$combined); let regions = (region AnyRegion:$region); @@ -1109,6 +1130,7 @@ def OpenACC_SerialOp : OpenACC_Op<"serial", }]; let assemblyFormat = [{ + ( `combined` `(` `loop` `)` $combined^)? oilist( `dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)` | `async` `(` custom($asyncOperands, @@ -1182,7 +1204,8 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels", Optional:$selfCond, UnitAttr:$selfAttr, Variadic:$dataClauseOperands, - OptionalAttr:$defaultAttr); + OptionalAttr:$defaultAttr, + UnitAttr:$combined); let regions = (region AnyRegion:$region); @@ -1242,6 +1265,7 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels", }]; let assemblyFormat = [{ + ( `combined` `(` `loop` `)` $combined^)? oilist( `dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)` | `async` `(` custom($asyncOperands, @@ -1573,7 +1597,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", Variadic:$privateOperands, OptionalAttr:$privatizations, Variadic:$reductionOperands, - OptionalAttr:$reductionRecipes + OptionalAttr:$reductionRecipes, + OptionalAttr:$combined ); let results = (outs Variadic:$results); @@ -1665,6 +1690,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", let hasCustomAssemblyFormat = 1; let assemblyFormat = [{ + custom($combined) oilist( `gang` `` custom($gangOperands, type($gangOperands), $gangOperandsArgType, $gangOperandsDeviceType, diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index 5563cb907e9353..33f613a46bad84 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -360,6 +360,10 @@ struct LevelType { std::string toMLIRString() const { std::string lvlStr = toFormatString(getLvlFmt()); std::string propStr = ""; + if (isa()) { + lvlStr += + "[" + std::to_string(getN()) + ", " + std::to_string(getM()) + "]"; + } if (isa()) propStr += toPropString(LevelPropNonDefault::Nonunique); diff --git a/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt new file mode 100644 index 00000000000000..f33061b2d87cff --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(IR) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt new file mode 100644 index 00000000000000..f1740e9ed929a6 --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt @@ -0,0 +1,14 @@ +add_mlir_dialect(XeGPU xegpu) +add_mlir_doc(XeGPU XeGPU Dialects/ -gen-dialect-doc -dialect=xegpu) + +set(LLVM_TARGET_DEFINITIONS XeGPU.td) +mlir_tablegen(XeGPUAttrs.h.inc -gen-attrdef-decls) +mlir_tablegen(XeGPUAttrs.cpp.inc -gen-attrdef-defs) +add_public_tablegen_target(MLIRXeGPUAttrsIncGen) +add_dependencies(mlir-headers MLIRXeGPUAttrsIncGen) + +set(LLVM_TARGET_DEFINITIONS XeGPU.td) +mlir_tablegen(XeGPUEnums.h.inc -gen-enum-decls) +mlir_tablegen(XeGPUEnums.cpp.inc -gen-enum-defs) +add_public_tablegen_target(MLIRXeGPUEnumsIncGen) +add_dependencies(mlir-headers MLIRXeGPUEnumsIncGen) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h new file mode 100644 index 00000000000000..7aaa4ecc7ee77a --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h @@ -0,0 +1,29 @@ +//===- XeGPU.h - MLIR dialect for XeGPU -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_H +#define MLIR_DIALECT_XEGPU_IR_XEGPU_H + +#include + +namespace mlir { +namespace xegpu { +// placeholder +} // namespace xegpu +} // namespace mlir + +#include +#include +#define GET_ATTRDEF_CLASSES +#include +#define GET_TYPEDEF_CLASSES +#include +#define GET_OP_CLASSES +#include + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPU_H diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td new file mode 100644 index 00000000000000..232e962870716c --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td @@ -0,0 +1,14 @@ +//===- XeGPU.td - XeGPU dialect definition ------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_TD +#define MLIR_DIALECT_XEGPU_IR_XEGPU_TD + +include "mlir/Dialect/XeGPU/IR/XeGPUOps.td" + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPU_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td new file mode 100644 index 00000000000000..bb325c272e3324 --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -0,0 +1,20 @@ +//===- XeGPUAttrs.td - XeGPU dialect attributes definition --*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD +#define MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD + +include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td" + +class XeGPUAttr traits = [], + string baseCppClass = "::mlir::Attribute"> + : AttrDef { + let mnemonic = attrMnemonic; +} + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td new file mode 100644 index 00000000000000..3851275ad30a0a --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td @@ -0,0 +1,30 @@ +//===- XeGPUDialect.td - XeGPU dialect definition -----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD +#define MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD + +include "mlir/IR/OpBase.td" + +def XeGPU_Dialect : Dialect { + let name = "xegpu"; + let cppNamespace = "::mlir::xegpu"; + let summary = "The XeGPU dialect that models Intel GPU's ISA"; + let description = [{ + The XeGPU dialect models Intel Xe ISA semantics but works at vector and + TensorDesc data type. It provides 1:1 mappings to match Xe instructions + like DPAS and 2D block load. The matrix size being processed at this level + exactly matches the hardware instructions or the intrinsic supported by + the lower-level GPU compiler. + }]; + + // let useDefaultTypePrinterParser = true; + // let useDefaultAttributePrinterParser = true; +} + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td new file mode 100644 index 00000000000000..5825ef9195b03f --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -0,0 +1,26 @@ +//===- XeGPUOps.td - XeGPU dialect operations definition ----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD +#define MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD + +include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td" +include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td" +include "mlir/Dialect/XeGPU/IR/XeGPUTypes.td" + + +// Base class for dialect operations. This operation inherits from the base +// `Op` class in OpBase.td, and provides: +// * The parent dialect of the operation. +// * The mnemonic for the operation, or the name without the dialect prefix. +// * A list of traits for the operation. +class XeGPU_Op traits = []>: + Op; + + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td new file mode 100644 index 00000000000000..1d75bb4e2906fe --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -0,0 +1,33 @@ +//===- XeGPUTypes.td - XeGPU dialect types definition -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD +#define MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD + +include "mlir/IR/BuiltinTypes.td" +include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td" +include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td" + +def XeGPU_IntType: AnyTypeOf<[I1, I8, I16, I32, I64, SI1, SI8, SI16, SI32, SI64, UI1, UI8, UI16, UI32, UI64]>; +def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, TF32]>; +def XeGPU_ScalarType: AnyTypeOf<[XeGPU_IntType, XeGPU_FloatType]>; +def XeGPU_BaseAddrType: AnyTypeOf<[MemRefRankOf<[XeGPU_ScalarType], [1, 2]>, UI64, UI32, I64, I32]>; +def XeGPU_DpasOpType: VectorOfRankAndType<[2, 3], [XeGPU_ScalarType]>; +def XeGPU_OffsetType: VectorOfRankAndType<[1], [Index]>; +def XeGPU_MaskType: AnyTypeOf<[VectorOfRankAndType<[1,2], [I1]>, I1]>; +def XeGPU_ValueType: AnyTypeOf<[VectorOfRankAndType<[1,2,3,4], [XeGPU_ScalarType]>, XeGPU_ScalarType]>; +def XeGPU_Vector2DType: VectorOfRankAndType<[2], [XeGPU_ScalarType]>; + +// common base class for types in XeGPU dialect +class XeGPUTypeDef traits = [], + string baseCppClass = "::mlir::Type"> + : TypeDef { + let mnemonic = typeMnemonic; +} + +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD diff --git a/mlir/include/mlir/IR/Dialect.h b/mlir/include/mlir/IR/Dialect.h index 50f6f6de5c2897..6c8a170a03c72d 100644 --- a/mlir/include/mlir/IR/Dialect.h +++ b/mlir/include/mlir/IR/Dialect.h @@ -216,6 +216,14 @@ class Dialect { {TypeID::get(), InterfaceT::getInterfaceID()}); } + // Declare the same interface for multiple types. + // Example: + // declarePromisedInterfaces() + template + void declarePromisedInterfaces() { + (declarePromisedInterface(), ...); + } + /// Checks if the given interface, which is attempting to be used, is a /// promised interface of this dialect that has yet to be implemented. If so, /// emits a fatal error. `interfaceName` is an optional string that contains a diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index e508d51205f347..21775e11e07149 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -43,10 +43,7 @@ #include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" -#include "mlir/Dialect/Linalg/IR/ValueBoundsOpInterfaceImpl.h" -#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Linalg/Transforms/SubsetInsertionOpInterfaceImpl.h" -#include "mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h" +#include "mlir/Dialect/Linalg/Transforms/AllInterfaces.h" #include "mlir/Dialect/MLProgram/IR/MLProgram.h" #include "mlir/Dialect/MLProgram/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/MPI/IR/MPI.h" @@ -89,6 +86,7 @@ #include "mlir/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/Vector/Transforms/SubsetOpInterfaceImpl.h" #include "mlir/Dialect/X86Vector/X86VectorDialect.h" +#include "mlir/Dialect/XeGPU/IR/XeGPU.h" #include "mlir/IR/Dialect.h" #include "mlir/Interfaces/CastInterfaces.h" #include "mlir/Target/LLVM/NVVM/Target.h" @@ -141,7 +139,8 @@ inline void registerAllDialects(DialectRegistry ®istry) { transform::TransformDialect, ub::UBDialect, vector::VectorDialect, - x86vector::X86VectorDialect>(); + x86vector::X86VectorDialect, + xegpu::XeGPUDialect>(); // clang-format on // Register all external models. @@ -155,10 +154,7 @@ inline void registerAllDialects(DialectRegistry ®istry) { cf::registerBufferizableOpInterfaceExternalModels(registry); cf::registerBufferDeallocationOpInterfaceExternalModels(registry); gpu::registerBufferDeallocationOpInterfaceExternalModels(registry); - linalg::registerBufferizableOpInterfaceExternalModels(registry); - linalg::registerSubsetOpInterfaceExternalModels(registry); - linalg::registerTilingInterfaceExternalModels(registry); - linalg::registerValueBoundsOpInterfaceExternalModels(registry); + linalg::registerAllDialectInterfaceImplementations(registry); memref::registerAllocationOpInterfaceExternalModels(registry); memref::registerRuntimeVerifiableOpInterfaceExternalModels(registry); memref::registerValueBoundsOpInterfaceExternalModels(registry); diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 01fde101ef3cb6..83198c9b0db545 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -1085,6 +1085,39 @@ struct ConversionConfig { /// IR during an analysis conversion and only pre-existing operations are /// added to the set. DenseSet *legalizableOps = nullptr; + + /// An optional listener that is notified about all IR modifications in case + /// dialect conversion succeeds. If the dialect conversion fails and no IR + /// modifications are visible (i.e., they were all rolled back), no + /// notifications are sent. + /// + /// Note: Notifications are sent in a delayed fashion, when the dialect + /// conversion is guaranteed to succeed. At that point, some IR modifications + /// may already have been materialized. Consequently, operations/blocks that + /// are passed to listener callbacks should not be accessed. (Ops/blocks are + /// guaranteed to be valid pointers and accessing op names is allowed. But + /// there are no guarantees about the state of ops/blocks at the time that a + /// callback is triggered.) + /// + /// Example: Consider a dialect conversion a new op ("test.foo") is created + /// and inserted, and later moved to another block. (Moving ops also triggers + /// "notifyOperationInserted".) + /// + /// (1) notifyOperationInserted: "test.foo" (into block "b1") + /// (2) notifyOperationInserted: "test.foo" (moved to another block "b2") + /// + /// When querying "op->getBlock()" during the first "notifyOperationInserted", + /// "b2" would be returned because "moving an op" is a kind of rewrite that is + /// immediately performed by the dialect conversion (and rolled back upon + /// failure). + // + // Note: When receiving a "notifyBlockInserted"/"notifyOperationInserted" + // callback, the previous region/block is provided to the callback, but not + // the iterator pointing to the exact location within the region/block. That + // is because these notifications are sent with a delay (after the IR has + // already been modified) and iterators into past IR state cannot be + // represented at the moment. + RewriterBase::Listener *listener = nullptr; }; //===----------------------------------------------------------------------===// diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp index 642018a814ca12..2d938ce5f4834c 100644 --- a/mlir/lib/CAPI/Dialect/LLVM.cpp +++ b/mlir/lib/CAPI/Dialect/LLVM.cpp @@ -7,9 +7,16 @@ //===----------------------------------------------------------------------===// #include "mlir-c/Dialect/LLVM.h" +#include "mlir-c/IR.h" +#include "mlir-c/Support.h" #include "mlir/CAPI/Registration.h" +#include "mlir/CAPI/Wrap.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "llvm-c/Core.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallVectorExtras.h" using namespace mlir; using namespace mlir::LLVM; @@ -110,3 +117,203 @@ MlirLogicalResult mlirLLVMStructTypeSetBody(MlirType structType, cast(unwrap(structType)) .setBody(unwrapList(nFieldTypes, fieldTypes, fields), isPacked)); } + +MlirAttribute mlirLLVMDIExpressionElemAttrGet(MlirContext ctx, + unsigned int opcode, + intptr_t nArguments, + uint64_t const *arguments) { + auto list = ArrayRef(arguments, nArguments); + return wrap(DIExpressionElemAttr::get(unwrap(ctx), opcode, list)); +} + +MlirAttribute mlirLLVMDIExpressionAttrGet(MlirContext ctx, intptr_t nOperations, + MlirAttribute const *operations) { + SmallVector attrStorage; + attrStorage.reserve(nOperations); + + return wrap(DIExpressionAttr::get( + unwrap(ctx), + llvm::map_to_vector( + unwrapList(nOperations, operations, attrStorage), + [](Attribute a) { return a.cast(); }))); +} + +MlirAttribute mlirLLVMDINullTypeAttrGet(MlirContext ctx) { + return wrap(DINullTypeAttr::get(unwrap(ctx))); +} + +MlirAttribute mlirLLVMDIBasicTypeAttrGet(MlirContext ctx, unsigned int tag, + MlirAttribute name, + uint64_t sizeInBits, + MlirLLVMTypeEncoding encoding) { + + return wrap(DIBasicTypeAttr::get( + unwrap(ctx), tag, cast(unwrap(name)), sizeInBits, encoding)); +} + +MlirAttribute mlirLLVMDICompositeTypeAttrGet( + MlirContext ctx, unsigned int tag, MlirAttribute name, MlirAttribute file, + uint32_t line, MlirAttribute scope, MlirAttribute baseType, int64_t flags, + uint64_t sizeInBits, uint64_t alignInBits, intptr_t nElements, + MlirAttribute const *elements) { + SmallVector elementsStorage; + elementsStorage.reserve(nElements); + + return wrap(DICompositeTypeAttr::get( + unwrap(ctx), tag, cast(unwrap(name)), + cast(unwrap(file)), line, cast(unwrap(scope)), + cast(unwrap(baseType)), DIFlags(flags), sizeInBits, + alignInBits, + llvm::map_to_vector(unwrapList(nElements, elements, elementsStorage), + [](Attribute a) { return a.cast(); }))); +} + +MlirAttribute mlirLLVMDIDerivedTypeAttrGet(MlirContext ctx, unsigned int tag, + MlirAttribute name, + MlirAttribute baseType, + uint64_t sizeInBits, + uint32_t alignInBits, + uint64_t offsetInBits) { + return wrap(DIDerivedTypeAttr::get(unwrap(ctx), tag, + cast(unwrap(name)), + cast(unwrap(baseType)), + sizeInBits, alignInBits, offsetInBits)); +} + +MlirAttribute +mlirLLVMDIDerivedTypeAttrGetBaseType(MlirAttribute diDerivedType) { + return wrap(cast(unwrap(diDerivedType)).getBaseType()); +} + +MlirAttribute mlirLLVMCConvAttrGet(MlirContext ctx, MlirLLVMCConv cconv) { + return wrap(CConvAttr::get(unwrap(ctx), CConv(cconv))); +} + +MlirAttribute mlirLLVMComdatAttrGet(MlirContext ctx, MlirLLVMComdat comdat) { + return wrap(ComdatAttr::get(unwrap(ctx), comdat::Comdat(comdat))); +} + +MlirAttribute mlirLLVMLinkageAttrGet(MlirContext ctx, MlirLLVMLinkage linkage) { + return wrap(LinkageAttr::get(unwrap(ctx), linkage::Linkage(linkage))); +} + +MlirAttribute mlirLLVMDIFileAttrGet(MlirContext ctx, MlirAttribute name, + MlirAttribute directory) { + return wrap(DIFileAttr::get(unwrap(ctx), cast(unwrap(name)), + cast(unwrap(directory)))); +} + +MlirAttribute +mlirLLVMDICompileUnitAttrGet(MlirContext ctx, MlirAttribute id, + unsigned int sourceLanguage, MlirAttribute file, + MlirAttribute producer, bool isOptimized, + MlirLLVMDIEmissionKind emissionKind) { + return wrap(DICompileUnitAttr::get( + unwrap(ctx), cast(unwrap(id)), sourceLanguage, + cast(unwrap(file)), cast(unwrap(producer)), + isOptimized, DIEmissionKind(emissionKind))); +} + +MlirAttribute mlirLLVMDIFlagsAttrGet(MlirContext ctx, uint64_t value) { + return wrap(DIFlagsAttr::get(unwrap(ctx), DIFlags(value))); +} + +MlirAttribute mlirLLVMDILexicalBlockAttrGet(MlirContext ctx, + MlirAttribute scope, + MlirAttribute file, + unsigned int line, + unsigned int column) { + return wrap( + DILexicalBlockAttr::get(unwrap(ctx), cast(unwrap(scope)), + cast(unwrap(file)), line, column)); +} + +MlirAttribute mlirLLVMDILexicalBlockFileAttrGet(MlirContext ctx, + MlirAttribute scope, + MlirAttribute file, + unsigned int discriminator) { + return wrap(DILexicalBlockFileAttr::get( + unwrap(ctx), cast(unwrap(scope)), + cast(unwrap(file)), discriminator)); +} + +MlirAttribute +mlirLLVMDILocalVariableAttrGet(MlirContext ctx, MlirAttribute scope, + MlirAttribute name, MlirAttribute diFile, + unsigned int line, unsigned int arg, + unsigned int alignInBits, MlirAttribute diType) { + return wrap(DILocalVariableAttr::get( + unwrap(ctx), cast(unwrap(scope)), + cast(unwrap(name)), cast(unwrap(diFile)), line, + arg, alignInBits, cast(unwrap(diType)))); +} + +MlirAttribute mlirLLVMDISubroutineTypeAttrGet(MlirContext ctx, + unsigned int callingConvention, + intptr_t nTypes, + MlirAttribute const *types) { + SmallVector attrStorage; + attrStorage.reserve(nTypes); + + return wrap(DISubroutineTypeAttr::get( + unwrap(ctx), callingConvention, + llvm::map_to_vector(unwrapList(nTypes, types, attrStorage), + [](Attribute a) { return a.cast(); }))); +} + +MlirAttribute mlirLLVMDISubprogramAttrGet( + MlirContext ctx, MlirAttribute id, MlirAttribute compileUnit, + MlirAttribute scope, MlirAttribute name, MlirAttribute linkageName, + MlirAttribute file, unsigned int line, unsigned int scopeLine, + uint64_t subprogramFlags, MlirAttribute type) { + return wrap(DISubprogramAttr::get( + unwrap(ctx), cast(unwrap(id)), + cast(unwrap(compileUnit)), + cast(unwrap(scope)), cast(unwrap(name)), + cast(unwrap(linkageName)), cast(unwrap(file)), + line, scopeLine, DISubprogramFlags(subprogramFlags), + cast(unwrap(type)))); +} + +MlirAttribute mlirLLVMDISubprogramAttrGetScope(MlirAttribute diSubprogram) { + return wrap(cast(unwrap(diSubprogram)).getScope()); +} + +unsigned int mlirLLVMDISubprogramAttrGetLine(MlirAttribute diSubprogram) { + return cast(unwrap(diSubprogram)).getLine(); +} + +unsigned int mlirLLVMDISubprogramAttrGetScopeLine(MlirAttribute diSubprogram) { + return cast(unwrap(diSubprogram)).getScopeLine(); +} + +MlirAttribute +mlirLLVMDISubprogramAttrGetCompileUnit(MlirAttribute diSubprogram) { + return wrap(cast(unwrap(diSubprogram)).getCompileUnit()); +} + +MlirAttribute mlirLLVMDISubprogramAttrGetFile(MlirAttribute diSubprogram) { + return wrap(cast(unwrap(diSubprogram)).getFile()); +} + +MlirAttribute mlirLLVMDISubprogramAttrGetType(MlirAttribute diSubprogram) { + return wrap(cast(unwrap(diSubprogram)).getType()); +} + +MlirAttribute mlirLLVMDIModuleAttrGet(MlirContext ctx, MlirAttribute file, + MlirAttribute scope, MlirAttribute name, + MlirAttribute configMacros, + MlirAttribute includePath, + MlirAttribute apinotes, unsigned int line, + bool isDecl) { + return wrap(DIModuleAttr::get( + unwrap(ctx), cast(unwrap(file)), + cast(unwrap(scope)), cast(unwrap(name)), + cast(unwrap(configMacros)), + cast(unwrap(includePath)), cast(unwrap(apinotes)), + line, isDecl)); +} + +MlirAttribute mlirLLVMDIModuleAttrGetScope(MlirAttribute diModule) { + return wrap(cast(unwrap(diModule)).getScope()); +} diff --git a/mlir/lib/CAPI/IR/BuiltinAttributes.cpp b/mlir/lib/CAPI/IR/BuiltinAttributes.cpp index cb6ce16fa317af..b035c0aac884d2 100644 --- a/mlir/lib/CAPI/IR/BuiltinAttributes.cpp +++ b/mlir/lib/CAPI/IR/BuiltinAttributes.cpp @@ -289,6 +289,10 @@ MlirTypeID mlirSymbolRefAttrGetTypeID(void) { return wrap(SymbolRefAttr::getTypeID()); } +MlirAttribute mlirDisctinctAttrCreate(MlirAttribute referencedAttr) { + return wrap(mlir::DistinctAttr::create(unwrap(referencedAttr))); +} + //===----------------------------------------------------------------------===// // Flat SymbolRef attribute. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index 9e82152cf5a56f..41ab7046b91ce3 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -2,8 +2,8 @@ add_subdirectory(AffineToStandard) add_subdirectory(AMDGPUToROCDL) add_subdirectory(ArithCommon) add_subdirectory(ArithToAMDGPU) -add_subdirectory(ArithToEmitC) add_subdirectory(ArithToArmSME) +add_subdirectory(ArithToEmitC) add_subdirectory(ArithToLLVM) add_subdirectory(ArithToSPIRV) add_subdirectory(ArmNeon2dToIntr) diff --git a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp index 11f8bc04b21844..31500c62c0d600 100644 --- a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp +++ b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp @@ -46,6 +46,8 @@ static constexpr StringLiteral kMatchFailureUnsupportedMaskOp( "op mask is unsupported for legalization/decomposition"); static constexpr StringLiteral kMatchFailureNonPermutationMap("op affine map is not a permutation"); +static constexpr StringLiteral kMatchFailureNotIllegalToLegal( + "expected transpose from illegal type to legal type"); /// An SMESubTile represents a single SME-sized sub-tile from decomposing a /// larger vector type. The (`row`, `col`) are the position of the tile in the @@ -416,6 +418,17 @@ struct FoldExtractFromVectorOfSMELikeCreateMasks } }; +/// A vector type where no fixed dimension comes after a scalable dimension. +bool isLegalVectorType(VectorType vType) { + bool seenFixedDim = false; + for (bool scalableFlag : llvm::reverse(vType.getScalableDims())) { + seenFixedDim |= !scalableFlag; + if (seenFixedDim && scalableFlag) + return false; + } + return true; +} + /// Lifts an illegal vector.transpose and vector.transfer_read to a /// memref.subview + memref.transpose, followed by a legal read. /// @@ -448,16 +461,6 @@ struct LiftIllegalVectorTransposeToMemory : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; - static bool isIllegalVectorType(VectorType vType) { - bool seenFixedDim = false; - for (bool scalableFlag : llvm::reverse(vType.getScalableDims())) { - seenFixedDim |= !scalableFlag; - if (seenFixedDim && scalableFlag) - return true; - } - return false; - } - static Value getExtensionSource(Operation *op) { if (isa_and_present(op)) return op->getOperand(0); @@ -468,9 +471,9 @@ struct LiftIllegalVectorTransposeToMemory PatternRewriter &rewriter) const override { auto sourceType = transposeOp.getSourceVectorType(); auto resultType = transposeOp.getResultVectorType(); - if (!isIllegalVectorType(sourceType) || isIllegalVectorType(resultType)) - return rewriter.notifyMatchFailure( - transposeOp, "expected transpose from illegal type to legal type"); + if (isLegalVectorType(sourceType) || !isLegalVectorType(resultType)) + return rewriter.notifyMatchFailure(transposeOp, + kMatchFailureNotIllegalToLegal); // Look through extend for transfer_read. Value maybeRead = transposeOp.getVector(); @@ -556,6 +559,59 @@ struct LiftIllegalVectorTransposeToMemory } }; +/// A rewrite to turn unit dim transpose-like vector.shape_casts into +/// vector.transposes. The shape_cast has to be from an illegal vector type to a +/// legal one (as defined by isLegalVectorType). +/// +/// The reasoning for this is if we've got to this pass and we still have +/// shape_casts of illegal types, then they likely will not cancel out. Turning +/// them into transposes gives LiftIllegalVectorTransposeToMemory a chance to +/// eliminate them. +/// +/// Example: +/// +/// BEFORE: +/// ```mlir +/// %0 = vector.shape_cast %a : vector<[4]x1xf32> to vector<1x[4]xf32> +/// ``` +/// +/// AFTER: +/// ```mlir +/// %0 = vector.transpose %0, [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> +/// ``` +struct ConvertIllegalShapeCastOpsToTransposes + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ShapeCastOp shapeCastOp, + PatternRewriter &rewriter) const override { + auto sourceType = shapeCastOp.getSourceVectorType(); + auto resultType = shapeCastOp.getResultVectorType(); + if (isLegalVectorType(sourceType) || !isLegalVectorType(resultType)) + return rewriter.notifyMatchFailure(shapeCastOp, + kMatchFailureNotIllegalToLegal); + + // Note: If we know that `sourceType` is an illegal vector type (and 2D) + // then dim 0 is scalable and dim 1 is fixed. + if (sourceType.getRank() != 2 || sourceType.getDimSize(1) != 1) + return rewriter.notifyMatchFailure( + shapeCastOp, "expected source to be a 2D scalable vector with a " + "trailing unit dim"); + + auto loc = shapeCastOp.getLoc(); + auto transpose = rewriter.create( + loc, shapeCastOp.getSource(), ArrayRef{1, 0}); + + if (resultType.getRank() == 1) + rewriter.replaceOpWithNewOp(shapeCastOp, resultType, + transpose); + else + rewriter.replaceOp(shapeCastOp, transpose); + + return success(); + } +}; + struct VectorLegalizationPass : public arm_sme::impl::VectorLegalizationBase { void runOnOperation() override { @@ -576,7 +632,8 @@ struct VectorLegalizationPass }); patterns.add(context); + LiftIllegalVectorTransposeToMemory, + ConvertIllegalShapeCastOpsToTransposes>(context); // Note: High benefit to ensure masked outer products are lowered first. patterns.add( converter, context, 1024); diff --git a/mlir/lib/Dialect/CMakeLists.txt b/mlir/lib/Dialect/CMakeLists.txt index c72107939cf42b..b1ba5a3bc8817d 100644 --- a/mlir/lib/Dialect/CMakeLists.txt +++ b/mlir/lib/Dialect/CMakeLists.txt @@ -40,6 +40,7 @@ add_subdirectory(UB) add_subdirectory(Utils) add_subdirectory(Vector) add_subdirectory(X86Vector) +add_subdirectory(XeGPU) set(LLVM_OPTIONAL_SOURCES Traits.cpp diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt index f0ac1899bb02ab..c187563b8f0c4e 100644 --- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt @@ -25,6 +25,7 @@ add_mlir_dialect_library(MLIRLinalgDialect MLIRInferTypeOpInterface MLIRIR MLIRParser + MLIRShardingInterface MLIRSideEffectInterfaces MLIRSparseTensorDialect MLIRSCFDialect diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp index 5069d43e7db95f..027058d4de6328 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/Mesh/Interfaces/ShardingInterface.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" @@ -118,6 +119,12 @@ void mlir::linalg::LinalgDialect::initialize() { >(namedStructuredOpRegionBuilders); addInterfaces(); + + declarePromisedInterface(); + declarePromisedInterfaces(); } LogicalResult LinalgDialect::verifyOperationAttribute(Operation *op, diff --git a/mlir/lib/Dialect/Linalg/Transforms/AllInterfaces.cpp b/mlir/lib/Dialect/Linalg/Transforms/AllInterfaces.cpp new file mode 100644 index 00000000000000..281d9f2204486b --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/AllInterfaces.cpp @@ -0,0 +1,24 @@ +//===- AllInterfaces.cpp - ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Transforms/AllInterfaces.h" + +#include "mlir/Dialect/Linalg/IR/ValueBoundsOpInterfaceImpl.h" +#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h" +#include "mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h" +#include "mlir/Dialect/Linalg/Transforms/SubsetInsertionOpInterfaceImpl.h" +#include "mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h" + +void mlir::linalg::registerAllDialectInterfaceImplementations( + DialectRegistry ®istry) { + registerBufferizableOpInterfaceExternalModels(registry); + registerMeshShardingInterfaceExternalModels(registry); + registerSubsetOpInterfaceExternalModels(registry); + registerTilingInterfaceExternalModels(registry); + registerValueBoundsOpInterfaceExternalModels(registry); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index 4f47e3b8718454..513c54de5d7bfc 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIRLinalgTransforms + AllInterfaces.cpp BubbleUpExtractSlice.cpp BufferizableOpInterfaceImpl.cpp Bufferize.cpp @@ -21,6 +22,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms InlineScalarOperands.cpp Interchange.cpp Loops.cpp + MeshShardingInterfaceImpl.cpp NamedOpConversions.cpp Padding.cpp Promotion.cpp @@ -61,12 +63,15 @@ add_mlir_dialect_library(MLIRLinalgTransforms MLIRIR MLIRMemRefDialect MLIRMemRefTransforms + MLIRMeshDialect + MLIRMeshTransforms MLIRLinalgDialect MLIRLinalgUtils MLIRSCFDialect MLIRSCFTransforms MLIRSCFUtils MLIRPass + MLIRShardingInterface MLIRSubsetOpInterface MLIRSparseTensorDialect MLIRTensorDialect diff --git a/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp new file mode 100644 index 00000000000000..146e880765668b --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.cpp @@ -0,0 +1,353 @@ +//===- MeshShardingInterfaceImpl.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Transforms/MeshShardingInterfaceImpl.h" + +#include "mlir/Analysis/SliceAnalysis.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h" +#include "mlir/Dialect/Mesh/IR/MeshOps.h" +#include "mlir/Dialect/Mesh/Interfaces/ShardingInterface.h" +#include "mlir/Dialect/Mesh/Interfaces/ShardingInterfaceImpl.h" +#include "mlir/Dialect/Mesh/Transforms/Transforms.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/DialectRegistry.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/IR/Value.h" +#include "mlir/Interfaces/TilingInterface.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TypeSwitch.h" +#include +#include +#include + +namespace mlir::linalg { + +using MeshAxis = mesh::MeshAxis; +using ReductionKind = mesh::ReductionKind; +using MeshShardingAttr = mesh::MeshShardingAttr; +using ShardingArray = mesh::ShardingArray; +using MeshOp = mesh::MeshOp; + +// Returns the corresponding mesh reduction kind for the given arith op. +static ReductionKind getReductionKind(Operation *op) { + return llvm::TypeSwitch(op) + // Floating-point operations. + .Case([](arith::AddFOp op) { return ReductionKind::Sum; }) + .Case([](arith::MulFOp op) { return ReductionKind::Product; }) + // TODO: handle maxnumf and minnumf. + .Case([](arith::MaximumFOp op) { return ReductionKind::Max; }) + .Case([](arith::MinimumFOp op) { return ReductionKind::Min; }) + // Integer operations. + .Case([](arith::AddIOp op) { return ReductionKind::Sum; }) + .Case([](arith::OrIOp op) { return ReductionKind::BitwiseOr; }) + .Case([](arith::XOrIOp op) { return ReductionKind::BitwiseXor; }) + .Case([](arith::AndIOp op) { return ReductionKind::Sum; }) + // TODO: handle signless, signed and unsigned types properly. + // It is assumed that the element type of the collective operands and + // result drive the meaning of the reduction kind, whether it is signed + // or unsigned. + // The reduction op inside the linalg op may have different result type + // from the element type of the linalg op's result. + // Also signed and unsigned Arith dialect ops may accept signed, unsigned + // or signless operands. + // Maybe expand the reduction kinds. + .Case([](arith::MaxUIOp op) { return ReductionKind::Max; }) + .Case([](arith::MinUIOp op) { return ReductionKind::Min; }) + .Case([](arith::MaxSIOp op) { return ReductionKind::Max; }) + .Case([](arith::MinSIOp op) { return ReductionKind::Min; }) + .Case([](arith::MulIOp op) { return ReductionKind::Product; }) + .Default([](Operation *op) { return ReductionKind::Generic; }); +} + +static std::optional getCombinerOp(LinalgOp op) { + SmallVector combinerOps; + Value reducedValue = matchReduction(op.getRegionOutputArgs(), 0, combinerOps); + if (!reducedValue || combinerOps.size() != 1) { + return std::nullopt; + } + + return combinerOps[0]; +} + +static ReductionKind getReductionKindOfLinalgOp(LinalgOp op) { + std::optional reductionOp = getCombinerOp(op); + if (!reductionOp) { + return ReductionKind::Generic; + } + [[maybe_unused]] Type resultElementType = + llvm::cast(op->getResult(0).getType()).getElementType(); + // TODO: handle case when result type of the reduction op does not match the + // element type of the result tensor. + // Would it makes sense at all? + assert(resultElementType == reductionOp.value()->getResult(0).getType()); + return getReductionKind(reductionOp.value()); +} + +static MeshOp getMesh(Operation *op, + ArrayRef operandShardings, + ArrayRef resultShardings, + SymbolTableCollection &symbolTable) { + for (MeshShardingAttr sharding : operandShardings) { + if (sharding) { + return mesh::getMesh(op, sharding.getMesh(), symbolTable); + } + } + + for (MeshShardingAttr sharding : resultShardings) { + if (sharding) { + return mesh::getMesh(op, sharding.getMesh(), symbolTable); + } + } + + assert(false); + return nullptr; +} + +// Choose the operand based on the current process index along the reduction +// mesh axes. +// We need to use the initial value only once to avoid including it in the +// reduction multiple times. +// In each process group only the leading process with linear index 0 would use +// the original operand. +// The other processes would use the reduction operation neutral tensor. +static Value createDestinationPassingStyleInitOperand( + LinalgOp op, Value spmdizedOperand, ArrayRef reductionMeshAxes, + MeshOp meshOp, ImplicitLocOpBuilder &builder) { + Value processLinearIndexInReductionGroup = mesh::createProcessLinearIndex( + meshOp.getSymName(), reductionMeshAxes, builder); + Value zero = builder.create(0); + Value isLeadProcess = builder.create( + builder.getI1Type(), arith::CmpIPredicate::eq, + processLinearIndexInReductionGroup, zero); + scf::IfOp ifOp = builder.create(spmdizedOperand.getType(), + isLeadProcess, true, true); + // Then block. + { + OpBuilder::InsertionGuard insertionGuard(builder); + builder.setInsertionPointToEnd(&ifOp.getThenRegion().front()); + builder.create(spmdizedOperand); + } + + // Else block. + { + OpBuilder::InsertionGuard insertionGuard(builder); + builder.setInsertionPointToEnd(&ifOp.getElseRegion().front()); + SmallVector shape = + tensor::getMixedSizes(builder, builder.getLoc(), spmdizedOperand); + PartialReductionOpInterface partialReductionIface = + llvm::cast(op.getOperation()); + FailureOr reductionNeutralTensorOp = + partialReductionIface.generateInitialTensorForPartialReduction( + builder, builder.getLoc(), shape, {}); + assert(succeeded(reductionNeutralTensorOp)); + builder.create( + reductionNeutralTensorOp.value()->getResult(0)); + } + return ifOp.getResult(0); +} + +// Create the DPS init operands for the spmdized Linalg op. +// Return all the new spmdized operands. +static SmallVector createDestinationPassingStyleInitOperands( + LinalgOp op, MeshOp meshOp, ArrayRef spmdizedOperands, + ArrayRef reductionMeshAxes, IRMapping &spmdizationMap, + ImplicitLocOpBuilder &builder) { + // TODO: add support for multiple destination passing style initial value + // operands. + // PartialReductionOpInterface::generateInitialTensorForPartialReduction + // needs to also support multiple DPS initial operands. + SmallVector newOperands = llvm::to_vector(spmdizedOperands); + auto operandIdx = op.getDpsInitOperand(0)->getOperandNumber(); + Value spmdizedInitOperand = + spmdizationMap.lookup(op->getOperands()[operandIdx]); + newOperands[operandIdx] = createDestinationPassingStyleInitOperand( + op, spmdizedInitOperand, reductionMeshAxes, meshOp, builder); + return newOperands; +} + +static void createAllReduceForResultWithoutPartialSharding( + Value unshardedLinalgOpResult, ArrayRef opReductionMeshAxes, + MeshShardingAttr resultSharding, ReductionKind reductionKind, + IRMapping &spmdizationMap, ImplicitLocOpBuilder &builder) { + SmallVector allReduceMeshAxes; + llvm::copy_if(opReductionMeshAxes, std::back_inserter(allReduceMeshAxes), + [&resultSharding](MeshAxis axis) { + return !llvm::is_contained(resultSharding.getPartialAxes(), + axis); + }); + if (allReduceMeshAxes.empty()) { + return; + } + + Value spmdizedLinalgOpResult = spmdizationMap.lookup(unshardedLinalgOpResult); + Value reducedValue = builder.create( + spmdizedLinalgOpResult, resultSharding.getMesh().getValue(), + allReduceMeshAxes, reductionKind); + spmdizationMap.map(unshardedLinalgOpResult, reducedValue); +} + +static void createAllReduceForResultsWithoutPartialShardings( + LinalgOp unshardedOp, ArrayRef opReductionMeshAxes, + ArrayRef resultShardings, IRMapping &spmdizationMap, + ImplicitLocOpBuilder &builder) { + ReductionKind reductionKind = getReductionKindOfLinalgOp(unshardedOp); + for (auto [unshardedLinalgOpResult, resultSharding] : + llvm::zip_equal(unshardedOp->getResults(), resultShardings)) { + createAllReduceForResultWithoutPartialSharding( + unshardedLinalgOpResult, opReductionMeshAxes, resultSharding, + reductionKind, spmdizationMap, builder); + } +} + +static void spmdizeLinalgOpWithShardedReduction( + LinalgOp op, ArrayRef spmdizedOperands, + ArrayRef operandShardings, + ArrayRef resultShardings, + ArrayRef loopIteratorTypes, + ArrayRef> meshAxisAssignmentForLoopIterators, + IRMapping &spmdizationMap, SymbolTableCollection &symbolTable, + ImplicitLocOpBuilder &builder) { + MeshOp mesh = getMesh(op, operandShardings, resultShardings, symbolTable); + SmallVector reductionMeshAxes = mesh::getReductionMeshAxes( + loopIteratorTypes, meshAxisAssignmentForLoopIterators); + SmallVector spmdizedLinalgOpOperands = + createDestinationPassingStyleInitOperands(op, mesh, spmdizedOperands, + reductionMeshAxes, + spmdizationMap, builder); + // We must not change the operand mappings of the original spmdizationMap as + // they are the mappings for the whole spmdization blob and may be used by + // others. + IRMapping internalSpmdizationMap; + for (auto [unshardedOperand, spmdizedOperand] : + llvm::zip_equal(op->getOperands(), spmdizedLinalgOpOperands)) { + internalSpmdizationMap.map(unshardedOperand, spmdizedOperand); + } + spmdizeTriviallyShardableOperation( + *op, spmdizedLinalgOpOperands, operandShardings, resultShardings, + internalSpmdizationMap, symbolTable, builder); + for (Value result : op->getResults()) { + spmdizationMap.map(result, internalSpmdizationMap.lookup(result)); + } + + // Handle partial shardings. + createAllReduceForResultsWithoutPartialShardings( + op, reductionMeshAxes, resultShardings, spmdizationMap, builder); +} + +namespace { + +// ShardingInterface for ops that implement LinalgStructuredInterface. +// The supported ops are only those where the indexing maps are projected +// permutations. +template +struct StructuredOpShardingInterface + : public mesh::ShardingInterface::ExternalModel< + StructuredOpShardingInterface, Op> { + SmallVector getLoopIteratorTypes(Operation *op) const { + return llvm::cast(op).getIteratorTypesArray(); + } + + SmallVector getIndexingMaps(Operation *op) const { + LinalgOp linalgOp = llvm::cast(op); + SmallVector res = linalgOp.getIndexingMapsArray(); + + // Results must have the same indexing as destination passing style initial + // operands. + for (int64_t i = 0; i < linalgOp.getNumDpsInits(); ++i) { + res.push_back(res[linalgOp.getDpsInitOperand(i)->getOperandNumber()]); + } + + return res; + } + + LogicalResult spmdize(Operation *op, ArrayRef spmdizedOperands, + ArrayRef operandShardings, + ArrayRef resultShardings, + IRMapping &spmdizationMap, + SymbolTableCollection &symbolTable, + OpBuilder &builder) const { + LinalgOp linalgOp = llvm::cast(op); + + SmallVector indexingMaps = linalgOp.getIndexingMapsArray(); + bool allIndexingMapsAreProjectedPermutation = + llvm::all_of(indexingMaps, [](AffineMap map) { + return map.isProjectedPermutation(); + }); + if (!allIndexingMapsAreProjectedPermutation) { + // TODO: handle non-projected permutations. + return op->emitOpError() + << "supports indexing maps that are only projected permutation."; + } + + SmallVector loopIteratorTypes = + linalgOp.getIteratorTypesArray(); + ShardingArray meshAxisAssignmentForLoopIterators = + getMeshAxisAssignmentForLoopIterators(operandShardings, resultShardings, + loopIteratorTypes, indexingMaps); + if (mesh::isAtLeastOneReductionIteratorSharded( + loopIteratorTypes, meshAxisAssignmentForLoopIterators)) { + ImplicitLocOpBuilder implicitLocBuilder(op->getLoc(), builder); + spmdizeLinalgOpWithShardedReduction( + linalgOp, spmdizedOperands, operandShardings, resultShardings, + loopIteratorTypes, meshAxisAssignmentForLoopIterators, spmdizationMap, + symbolTable, implicitLocBuilder); + } else { + spmdizeTriviallyShardableOperation(*op, spmdizedOperands, + operandShardings, resultShardings, + spmdizationMap, symbolTable, builder); + } + + return success(); + } +}; + +} // namespace + +template +static void registerOne(MLIRContext *ctx) { + OpType::template attachInterface>(*ctx); +} + +/// Variadic helper function. +template +static void registerAll(MLIRContext *ctx) { + (registerOne(ctx), ...); +} + +void registerMeshShardingInterfaceExternalModels(DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *dialect) { + DialectRegistry registry; + registry.insert(); + ctx->appendDialectRegistry(registry); + for (StringRef name : registry.getDialectNames()) + ctx->getOrLoadDialect(name); + + registerOne(ctx); + registerAll< +#define GET_OP_LIST +#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" + >(ctx); + }); +} + +} // namespace mlir::linalg diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp index 8b3119f02e8fda..bd870d4f982e5d 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -275,14 +275,6 @@ struct LinalgOpPartialReductionInterface ArrayRef oldShape = linalgOp.getShape(linalgOp.getDpsInitOperand(0)); - // Extend tile size vector to the rank of the output tensor. - SmallVector tileSizeVector = - getValueOrCreateConstantIndexOp(b, loc, sizes); - if (tileSizeVector.size() < oldShape.size()) { - auto zero = b.create(loc, 0); - tileSizeVector.append(oldShape.size() - tileSizeVector.size(), zero); - } - // Calculate the new shape, we insert the new dimensions based on the index // of the reduction dimensions. SmallVector newOutputShape; diff --git a/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp b/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp index 50163880e85f96..03f11ad1f94965 100644 --- a/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp +++ b/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp @@ -647,6 +647,13 @@ void AllReduceOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void AllReduceOp::build(OpBuilder &odsBuilder, OperationState &odsState, + Value input, StringRef mesh, + ArrayRef meshAxes, ReductionKind reduction) { + build(odsBuilder, odsState, input.getType(), mesh, meshAxes, input, + reduction); +} + void AllReduceOp::getAsmResultNames( function_ref setNameFn) { setNameFn(getResult(), "all_reduce"); diff --git a/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp b/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp index fe3d7c44413fef..9acee5aa8d8604 100644 --- a/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp +++ b/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp @@ -539,8 +539,9 @@ static bool areValuesCompatibleWithFullReplicationShardings( if (std::size(values) != std::size(shardings)) { return false; } - return llvm::all_of(llvm::zip(std::forward(values), - std::forward(shardings)), + return llvm::all_of(llvm::zip_equal( + std::forward(values), + std::forward(shardings)), [](auto valueAndSharding) { return isValueCompatibleWithFullReplicationSharding( std::get<0>(valueAndSharding), @@ -563,6 +564,88 @@ void mesh::spmdizeFullyReplicatedOperation( builder.clone(op, spmdizationMap); } +static void updateMeshAxisAssignmentForLoopIterators( + ArrayRef meshAxesAssignmentForTensorAxis, AffineExpr indexingExpr, + SmallVector>> + &meshAxesAssignmentForLoopIterators) { + AffineDimExpr affineDimExpr = cast(indexingExpr); + unsigned loopIteratorIdx = affineDimExpr.getPosition(); + if (meshAxesAssignmentForLoopIterators[loopIteratorIdx]) { + assert(llvm::equal(meshAxesAssignmentForTensorAxis, + *meshAxesAssignmentForLoopIterators[loopIteratorIdx])); + } else { + meshAxesAssignmentForLoopIterators[loopIteratorIdx] = + llvm::to_vector(meshAxesAssignmentForTensorAxis); + } +} + +ShardingArray mesh::getMeshAxisAssignmentForLoopIterators( + ArrayRef operandShardings, + ArrayRef resultShardings, + ArrayRef loopIteratorTypes, + ArrayRef indexingMaps) { + SmallVector>> + meshAxisAssignmentForLoopIterators(loopIteratorTypes.size()); + SmallVector operatorAndResultShardings; + operatorAndResultShardings.reserve(operandShardings.size() + + resultShardings.size()); + llvm::append_range(operatorAndResultShardings, operandShardings); + for (auto [sharding, affineMap] : + llvm::zip_equal(operatorAndResultShardings, indexingMaps)) { + if (!sharding) { + continue; + } + for (auto [meshAxesAssignmentForTensorAxis, indexingExpr] : + llvm::zip(sharding.getSplitAxes(), affineMap.getResults())) { + updateMeshAxisAssignmentForLoopIterators( + meshAxesAssignmentForTensorAxis.asArrayRef(), indexingExpr, + meshAxisAssignmentForLoopIterators); + } + // Missing trailing split axes means replication on those tensor dimensions. + for (unsigned i = sharding.getSplitAxes().size(); + i < affineMap.getNumResults(); ++i) { + updateMeshAxisAssignmentForLoopIterators( + {}, affineMap.getResults()[i], meshAxisAssignmentForLoopIterators); + } + } + + ShardingArray res; + llvm::transform(meshAxisAssignmentForLoopIterators, std::back_inserter(res), + [](std::optional> &axes) { + if (!axes) { + return SmallVector(); + }; + return std::move(*axes); + }); + return res; +} + +bool mesh::isAtLeastOneReductionIteratorSharded( + ArrayRef loopIteratorTypes, + ArrayRef> meshAxisAssignmentForLoopIterators) { + for (auto [loopIteratorType, meshAxisAssignment] : + llvm::zip_equal(loopIteratorTypes, meshAxisAssignmentForLoopIterators)) { + if (loopIteratorType == utils::IteratorType::reduction && + !meshAxisAssignment.empty()) { + return true; + } + } + return false; +} + +SmallVector mesh::getReductionMeshAxes( + ArrayRef loopIteratorTypes, + ArrayRef> meshAxisAssignmentForLoopIterators) { + SmallVector meshAxes; + for (auto [loopIteratorType, meshAxisAssignment] : + llvm::zip_equal(loopIteratorTypes, meshAxisAssignmentForLoopIterators)) { + if (loopIteratorType == utils::IteratorType::reduction) { + llvm::append_range(meshAxes, meshAxisAssignment); + } + } + return meshAxes; +} + void mesh::spmdizeTriviallyShardableOperation( Operation &op, ArrayRef spmdizedOperands, ArrayRef operandShardings, @@ -572,7 +655,7 @@ void mesh::spmdizeTriviallyShardableOperation( Operation *newOp = builder.clone(op, spmdizationMap); // Set the result types to the sharded counterparts. for (auto [oldResult, newResult, sharding] : - llvm::zip(op.getResults(), newOp->getResults(), resultShardings)) { + llvm::zip_equal(op.getResults(), newOp->getResults(), resultShardings)) { newResult.setType(shardType(newResult.getType(), getMesh(&op, sharding.getMesh(), symbolTable), sharding)); diff --git a/mlir/lib/Dialect/Mesh/Transforms/Transforms.cpp b/mlir/lib/Dialect/Mesh/Transforms/Transforms.cpp index d59b9119dea541..cb13ee404751ca 100644 --- a/mlir/lib/Dialect/Mesh/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Mesh/Transforms/Transforms.cpp @@ -208,4 +208,17 @@ createCollectiveProcessGroupSize(MeshOp mesh, ArrayRef axes, .cast>(); } +TypedValue createProcessLinearIndex(StringRef mesh, + ArrayRef meshAxes, + ImplicitLocOpBuilder &builder) { + ResultRange processInGroupMultiIndex = + builder.create(mesh, meshAxes).getResults(); + Operation::result_range processGroupShape = + builder.create(mesh, meshAxes).getResult(); + OpFoldResult processInGroupLinearIndex = affine::linearizeIndex( + llvm::to_vector_of(processInGroupMultiIndex), + llvm::to_vector_of(processGroupShape), builder); + return cast>(processInGroupLinearIndex.get()); +} + } // namespace mlir::mesh diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 18187e7d4f66cd..c09a3403f9a3e3 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -1283,6 +1283,50 @@ static void printDeviceTypeOperandsWithKeywordOnly( p << ")"; } +static ParseResult +parseCombinedConstructsLoop(mlir::OpAsmParser &parser, + mlir::acc::CombinedConstructsTypeAttr &attr) { + if (succeeded(parser.parseOptionalKeyword("combined"))) { + if (parser.parseLParen()) + return failure(); + if (succeeded(parser.parseOptionalKeyword("kernels"))) { + attr = mlir::acc::CombinedConstructsTypeAttr::get( + parser.getContext(), mlir::acc::CombinedConstructsType::KernelsLoop); + } else if (succeeded(parser.parseOptionalKeyword("parallel"))) { + attr = mlir::acc::CombinedConstructsTypeAttr::get( + parser.getContext(), mlir::acc::CombinedConstructsType::ParallelLoop); + } else if (succeeded(parser.parseOptionalKeyword("serial"))) { + attr = mlir::acc::CombinedConstructsTypeAttr::get( + parser.getContext(), mlir::acc::CombinedConstructsType::SerialLoop); + } else { + parser.emitError(parser.getCurrentLocation(), + "expected compute construct name"); + return failure(); + } + if (parser.parseRParen()) + return failure(); + } + return success(); +} + +static void +printCombinedConstructsLoop(mlir::OpAsmPrinter &p, mlir::Operation *op, + mlir::acc::CombinedConstructsTypeAttr attr) { + if (attr) { + switch (attr.getValue()) { + case mlir::acc::CombinedConstructsType::KernelsLoop: + p << "combined(kernels)"; + break; + case mlir::acc::CombinedConstructsType::ParallelLoop: + p << "combined(parallel)"; + break; + case mlir::acc::CombinedConstructsType::SerialLoop: + p << "combined(serial)"; + break; + }; + } +} + //===----------------------------------------------------------------------===// // SerialOp //===----------------------------------------------------------------------===// @@ -1851,6 +1895,13 @@ LogicalResult acc::LoopOp::verify() { "reductions", false))) return failure(); + if (getCombined().has_value() && + (getCombined().value() != acc::CombinedConstructsType::ParallelLoop && + getCombined().value() != acc::CombinedConstructsType::KernelsLoop && + getCombined().value() != acc::CombinedConstructsType::SerialLoop)) { + return emitError("unexpected combined constructs attribute"); + } + // Check non-empty body(). if (getRegion().empty()) return emitError("expected non-empty body."); diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 6ba8b46370b038..c19907a945d3bb 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -646,28 +646,16 @@ void SparseTensorEncodingAttr::printDimensions( } } -std::string getNOutOfMString(LevelType lt) { - if (isNOutOfMLT(lt)) { - unsigned n = getN(lt); - unsigned m = getM(lt); - auto output = "[" + std::to_string(n) + ", " + std::to_string(m) + "]"; - return output; - } - return ""; -} - void SparseTensorEncodingAttr::printLevels(AffineMap &map, AsmPrinter &printer, ArrayRef lvlTypes) const { for (unsigned i = 0, n = map.getNumResults() - 1; i < n; i++) { map.getResult(i).print(printer.getStream()); - printer << " : " << toMLIRString(lvlTypes[i]) - << getNOutOfMString(lvlTypes[i]) << ", "; + printer << " : " << toMLIRString(lvlTypes[i]) << ", "; } if (map.getNumResults() >= 1) { auto lastIndex = map.getNumResults() - 1; map.getResult(lastIndex).print(printer.getStream()); - printer << " : " << toMLIRString(lvlTypes[lastIndex]) - << getNOutOfMString(lvlTypes[lastIndex]); + printer << " : " << toMLIRString(lvlTypes[lastIndex]); } } diff --git a/mlir/lib/Dialect/XeGPU/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/CMakeLists.txt new file mode 100644 index 00000000000000..f33061b2d87cff --- /dev/null +++ b/mlir/lib/Dialect/XeGPU/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(IR) diff --git a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt new file mode 100644 index 00000000000000..2e99f39ed86d2e --- /dev/null +++ b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt @@ -0,0 +1,15 @@ +add_mlir_dialect_library(MLIRXeGPUDialect + XeGPUDialect.cpp + XeGPUOps.cpp + + ADDITIONAL_HEADER_DIRS + ${PROJECT_SOURCE_DIR}/include/mlir/Dialect/XeGPU + + DEPENDS + MLIRXeGPUIncGen + MLIRXeGPUAttrsIncGen + MLIRXeGPUEnumsIncGen + + LINK_LIBS PUBLIC + MLIRIR +) diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp new file mode 100644 index 00000000000000..4f839ee773476b --- /dev/null +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -0,0 +1,39 @@ +//===- XeGPUDialect.cpp - MLIR XeGPU dialect implementation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +namespace mlir { +namespace xegpu { + +void XeGPUDialect::initialize() { + addTypes< +#define GET_TYPEDEF_LIST +#include + >(); + addOperations< +#define GET_OP_LIST +#include + >(); + addAttributes< +#define GET_ATTRDEF_LIST +#include + >(); +} + +// this file is for position occupation, +// we will add functions in following PRs. + +} // namespace xegpu +} // namespace mlir + +#include +#define GET_ATTRDEF_CLASSES +#include +#define GET_TYPEDEF_CLASSES +#include diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp new file mode 100644 index 00000000000000..0e89ac4df6ef28 --- /dev/null +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -0,0 +1,23 @@ +//===- XeGPUOps.cpp - MLIR XeGPU ops implementation -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#define DEBUG_TYPE "xegpu" + +namespace mlir { +namespace xegpu { +// this file is for position occupation, +// we will add functions in following PRs. + +} // namespace xegpu +} // namespace mlir + +#include +#define GET_OP_CLASSES +#include diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index d7dc902a9a5ebd..c1a261eab8487d 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -204,14 +204,22 @@ class IRRewrite { /// Roll back the rewrite. Operations may be erased during rollback. virtual void rollback() = 0; - /// Commit the rewrite. Operations/blocks may be unlinked during the commit - /// phase, but they must not be erased yet. This is because internal dialect - /// conversion state (such as `mapping`) may still be using them. Operations/ - /// blocks must be erased during cleanup. - virtual void commit() {} + /// Commit the rewrite. At this point, it is certain that the dialect + /// conversion will succeed. All IR modifications, except for operation/block + /// erasure, must be performed through the given rewriter. + /// + /// Instead of erasing operations/blocks, they should merely be unlinked + /// commit phase and finally be erased during the cleanup phase. This is + /// because internal dialect conversion state (such as `mapping`) may still + /// be using them. + /// + /// Any IR modification that was already performed before the commit phase + /// (e.g., insertion of an op) must be communicated to the listener that may + /// be attached to the given rewriter. + virtual void commit(RewriterBase &rewriter) {} /// Cleanup operations/blocks. Cleanup is called after commit. - virtual void cleanup() {} + virtual void cleanup(RewriterBase &rewriter) {} Kind getKind() const { return kind; } @@ -221,12 +229,6 @@ class IRRewrite { IRRewrite(Kind kind, ConversionPatternRewriterImpl &rewriterImpl) : kind(kind), rewriterImpl(rewriterImpl) {} - /// Erase the given op (unless it was already erased). - void eraseOp(Operation *op); - - /// Erase the given block (unless it was already erased). - void eraseBlock(Block *block); - const ConversionConfig &getConfig() const; const Kind kind; @@ -265,6 +267,12 @@ class CreateBlockRewrite : public BlockRewrite { return rewrite->getKind() == Kind::CreateBlock; } + void commit(RewriterBase &rewriter) override { + // The block was already created and inserted. Just inform the listener. + if (auto *listener = rewriter.getListener()) + listener->notifyBlockInserted(block, /*previous=*/{}, /*previousIt=*/{}); + } + void rollback() override { // Unlink all of the operations within this block, they will be deleted // separately. @@ -311,10 +319,19 @@ class EraseBlockRewrite : public BlockRewrite { block = nullptr; } - void cleanup() override { + void commit(RewriterBase &rewriter) override { // Erase the block. assert(block && "expected block"); assert(block->empty() && "expected empty block"); + + // Notify the listener that the block is about to be erased. + if (auto *listener = + dyn_cast_or_null(rewriter.getListener())) + listener->notifyBlockErased(block); + } + + void cleanup(RewriterBase &rewriter) override { + // Erase the block. block->dropAllDefinedValueUses(); delete block; block = nullptr; @@ -341,6 +358,13 @@ class InlineBlockRewrite : public BlockRewrite { firstInlinedInst(sourceBlock->empty() ? nullptr : &sourceBlock->front()), lastInlinedInst(sourceBlock->empty() ? nullptr : &sourceBlock->back()) { + // If a listener is attached to the dialect conversion, ops must be moved + // one-by-one. When they are moved in bulk, notifications cannot be sent + // because the ops that used to be in the source block at the time of the + // inlining (before the "commit" phase) are unknown at the time when + // notifications are sent (which is during the "commit" phase). + assert(!getConfig().listener && + "InlineBlockRewrite not supported if listener is attached"); } static bool classof(const IRRewrite *rewrite) { @@ -382,6 +406,16 @@ class MoveBlockRewrite : public BlockRewrite { return rewrite->getKind() == Kind::MoveBlock; } + void commit(RewriterBase &rewriter) override { + // The block was already moved. Just inform the listener. + if (auto *listener = rewriter.getListener()) { + // Note: `previousIt` cannot be passed because this is a delayed + // notification and iterators into past IR state cannot be represented. + listener->notifyBlockInserted(block, /*previous=*/region, + /*previousIt=*/{}); + } + } + void rollback() override { // Move the block back to its original position. Region::iterator before = @@ -437,7 +471,7 @@ class BlockTypeConversionRewrite : public BlockRewrite { LogicalResult materializeLiveConversions(function_ref findLiveUser); - void commit() override; + void commit(RewriterBase &rewriter) override; void rollback() override; @@ -466,7 +500,7 @@ class ReplaceBlockArgRewrite : public BlockRewrite { return rewrite->getKind() == Kind::ReplaceBlockArg; } - void commit() override; + void commit(RewriterBase &rewriter) override; void rollback() override; @@ -506,6 +540,17 @@ class MoveOperationRewrite : public OperationRewrite { return rewrite->getKind() == Kind::MoveOperation; } + void commit(RewriterBase &rewriter) override { + // The operation was already moved. Just inform the listener. + if (auto *listener = rewriter.getListener()) { + // Note: `previousIt` cannot be passed because this is a delayed + // notification and iterators into past IR state cannot be represented. + listener->notifyOperationInserted( + op, /*previous=*/OpBuilder::InsertPoint(/*insertBlock=*/block, + /*insertPt=*/{})); + } + } + void rollback() override { // Move the operation back to its original position. Block::iterator before = @@ -549,7 +594,12 @@ class ModifyOperationRewrite : public OperationRewrite { "rewrite was neither committed nor rolled back"); } - void commit() override { + void commit(RewriterBase &rewriter) override { + // Notify the listener that the operation was modified in-place. + if (auto *listener = + dyn_cast_or_null(rewriter.getListener())) + listener->notifyOperationModified(op); + if (propertiesStorage) { OpaqueProperties propCopy(propertiesStorage); // Note: The operation may have been erased in the mean time, so @@ -600,11 +650,11 @@ class ReplaceOperationRewrite : public OperationRewrite { return rewrite->getKind() == Kind::ReplaceOperation; } - void commit() override; + void commit(RewriterBase &rewriter) override; void rollback() override; - void cleanup() override; + void cleanup(RewriterBase &rewriter) override; const TypeConverter *getConverter() const { return converter; } @@ -629,6 +679,12 @@ class CreateOperationRewrite : public OperationRewrite { return rewrite->getKind() == Kind::CreateOperation; } + void commit(RewriterBase &rewriter) override { + // The operation was already created and inserted. Just inform the listener. + if (auto *listener = rewriter.getListener()) + listener->notifyOperationInserted(op, /*previous=*/{}); + } + void rollback() override; }; @@ -666,7 +722,7 @@ class UnresolvedMaterializationRewrite : public OperationRewrite { void rollback() override; - void cleanup() override; + void cleanup(RewriterBase &rewriter) override; /// Return the type converter of this materialization (which may be null). const TypeConverter *getConverter() const { @@ -735,7 +791,7 @@ namespace detail { struct ConversionPatternRewriterImpl : public RewriterBase::Listener { explicit ConversionPatternRewriterImpl(MLIRContext *ctx, const ConversionConfig &config) - : eraseRewriter(ctx), config(config) {} + : context(ctx), config(config) {} //===--------------------------------------------------------------------===// // State Management @@ -900,6 +956,7 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { } void notifyOperationErased(Operation *op) override { erased.insert(op); } + void notifyBlockErased(Block *block) override { erased.insert(block); } /// Pointers to all erased operations and blocks. @@ -910,8 +967,8 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { // State //===--------------------------------------------------------------------===// - /// This rewriter must be used for erasing ops/blocks. - SingleEraseRewriter eraseRewriter; + /// MLIR context. + MLIRContext *context; // Mapping between replaced values that differ in type. This happens when // replacing a value with one of a different type. @@ -955,19 +1012,19 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { } // namespace detail } // namespace mlir -void IRRewrite::eraseOp(Operation *op) { - rewriterImpl.eraseRewriter.eraseOp(op); -} - -void IRRewrite::eraseBlock(Block *block) { - rewriterImpl.eraseRewriter.eraseBlock(block); -} - const ConversionConfig &IRRewrite::getConfig() const { return rewriterImpl.config; } -void BlockTypeConversionRewrite::commit() { +void BlockTypeConversionRewrite::commit(RewriterBase &rewriter) { + // Inform the listener about all IR modifications that have already taken + // place: References to the original block have been replaced with the new + // block. + if (auto *listener = dyn_cast_or_null( + rewriter.getListener())) + for (Operation *op : block->getUsers()) + listener->notifyOperationModified(op); + // Process the remapping for each of the original arguments. for (auto [origArg, info] : llvm::zip_equal(origBlock->getArguments(), argInfo)) { @@ -975,7 +1032,7 @@ void BlockTypeConversionRewrite::commit() { if (!info) { if (Value newArg = rewriterImpl.mapping.lookupOrNull(origArg, origArg.getType())) - origArg.replaceAllUsesWith(newArg); + rewriter.replaceAllUsesWith(origArg, newArg); continue; } @@ -985,8 +1042,8 @@ void BlockTypeConversionRewrite::commit() { // If the argument is still used, replace it with the generated cast. if (!origArg.use_empty()) { - origArg.replaceAllUsesWith( - rewriterImpl.mapping.lookupOrDefault(castValue, origArg.getType())); + rewriter.replaceAllUsesWith(origArg, rewriterImpl.mapping.lookupOrDefault( + castValue, origArg.getType())); } } } @@ -1042,13 +1099,13 @@ LogicalResult BlockTypeConversionRewrite::materializeLiveConversions( return success(); } -void ReplaceBlockArgRewrite::commit() { +void ReplaceBlockArgRewrite::commit(RewriterBase &rewriter) { Value repl = rewriterImpl.mapping.lookupOrNull(arg, arg.getType()); if (!repl) return; if (isa(repl)) { - arg.replaceAllUsesWith(repl); + rewriter.replaceAllUsesWith(arg, repl); return; } @@ -1057,7 +1114,7 @@ void ReplaceBlockArgRewrite::commit() { // replacement value. Operation *replOp = cast(repl).getOwner(); Block *replBlock = replOp->getBlock(); - arg.replaceUsesWithIf(repl, [&](OpOperand &operand) { + rewriter.replaceUsesWithIf(arg, repl, [&](OpOperand &operand) { Operation *user = operand.getOwner(); return user->getBlock() != replBlock || replOp->isBeforeInBlock(user); }); @@ -1065,14 +1122,40 @@ void ReplaceBlockArgRewrite::commit() { void ReplaceBlockArgRewrite::rollback() { rewriterImpl.mapping.erase(arg); } -void ReplaceOperationRewrite::commit() { - for (OpResult result : op->getResults()) - if (Value newValue = - rewriterImpl.mapping.lookupOrNull(result, result.getType())) - result.replaceAllUsesWith(newValue); +void ReplaceOperationRewrite::commit(RewriterBase &rewriter) { + auto *listener = dyn_cast_or_null( + rewriter.getListener()); + + // Compute replacement values. + SmallVector replacements = + llvm::map_to_vector(op->getResults(), [&](OpResult result) { + return rewriterImpl.mapping.lookupOrNull(result, result.getType()); + }); + + // Notify the listener that the operation is about to be replaced. + if (listener) + listener->notifyOperationReplaced(op, replacements); + + // Replace all uses with the new values. + for (auto [result, newValue] : + llvm::zip_equal(op->getResults(), replacements)) + if (newValue) + rewriter.replaceAllUsesWith(result, newValue); + + // The original op will be erased, so remove it from the set of unlegalized + // ops. if (getConfig().unlegalizedOps) getConfig().unlegalizedOps->erase(op); + + // Notify the listener that the operation (and its nested operations) was + // erased. + if (listener) { + op->walk( + [&](Operation *op) { listener->notifyOperationErased(op); }); + } + // Do not erase the operation yet. It may still be referenced in `mapping`. + // Just unlink it for now and erase it during cleanup. op->getBlock()->getOperations().remove(op); } @@ -1081,7 +1164,9 @@ void ReplaceOperationRewrite::rollback() { rewriterImpl.mapping.erase(result); } -void ReplaceOperationRewrite::cleanup() { eraseOp(op); } +void ReplaceOperationRewrite::cleanup(RewriterBase &rewriter) { + rewriter.eraseOp(op); +} void CreateOperationRewrite::rollback() { for (Region ®ion : op->getRegions()) { @@ -1100,14 +1185,20 @@ void UnresolvedMaterializationRewrite::rollback() { op->erase(); } -void UnresolvedMaterializationRewrite::cleanup() { eraseOp(op); } +void UnresolvedMaterializationRewrite::cleanup(RewriterBase &rewriter) { + rewriter.eraseOp(op); +} void ConversionPatternRewriterImpl::applyRewrites() { // Commit all rewrites. + IRRewriter rewriter(context, config.listener); for (auto &rewrite : rewrites) - rewrite->commit(); + rewrite->commit(rewriter); + + // Clean up all rewrites. + SingleEraseRewriter eraseRewriter(context); for (auto &rewrite : rewrites) - rewrite->cleanup(); + rewrite->cleanup(eraseRewriter); } //===----------------------------------------------------------------------===// @@ -1281,7 +1372,7 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( ConversionPatternRewriter &rewriter, Block *block, const TypeConverter *converter, TypeConverter::SignatureConversion &signatureConversion) { - MLIRContext *ctx = rewriter.getContext(); + OpBuilder::InsertionGuard g(rewriter); // If no arguments are being changed or added, there is nothing to do. unsigned origArgCount = block->getNumArguments(); @@ -1289,14 +1380,9 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( if (llvm::equal(block->getArgumentTypes(), convertedTypes)) return block; - // Split the block at the beginning to get a new block to use for the updated - // signature. - Block *newBlock = rewriter.splitBlock(block, block->begin()); - block->replaceAllUsesWith(newBlock); - - // Map all new arguments to the location of the argument they originate from. + // Compute the locations of all block arguments in the new block. SmallVector newLocs(convertedTypes.size(), - Builder(ctx).getUnknownLoc()); + rewriter.getUnknownLoc()); for (unsigned i = 0; i < origArgCount; ++i) { auto inputMap = signatureConversion.getInputMapping(i); if (!inputMap || inputMap->replacementValue) @@ -1306,9 +1392,29 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( newLocs[inputMap->inputNo + j] = origLoc; } - SmallVector newArgRange( - newBlock->addArguments(convertedTypes, newLocs)); - ArrayRef newArgs(newArgRange); + // Insert a new block with the converted block argument types and move all ops + // from the old block to the new block. + Block *newBlock = + rewriter.createBlock(block->getParent(), std::next(block->getIterator()), + convertedTypes, newLocs); + + // If a listener is attached to the dialect conversion, ops cannot be moved + // to the destination block in bulk ("fast path"). This is because at the time + // the notifications are sent, it is unknown which ops were moved. Instead, + // ops should be moved one-by-one ("slow path"), so that a separate + // `MoveOperationRewrite` is enqueued for each moved op. Moving ops in bulk is + // a bit more efficient, so we try to do that when possible. + bool fastPath = !config.listener; + if (fastPath) { + appendRewrite(newBlock, block, newBlock->end()); + newBlock->getOperations().splice(newBlock->end(), block->getOperations()); + } else { + while (!block->empty()) + rewriter.moveOpBefore(&block->front(), newBlock, newBlock->end()); + } + + // Replace all uses of the old block with the new block. + block->replaceAllUsesWith(newBlock); // Remap each of the original arguments as determined by the signature // conversion. @@ -1333,7 +1439,8 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( } // Otherwise, this is a 1->1+ mapping. - auto replArgs = newArgs.slice(inputMap->inputNo, inputMap->size); + auto replArgs = + newBlock->getArguments().slice(inputMap->inputNo, inputMap->size); Value newArg; // If this is a 1->1 mapping and the types of new and replacement arguments @@ -1642,10 +1749,31 @@ void ConversionPatternRewriter::inlineBlockBefore(Block *source, Block *dest, "expected 'source' to have no predecessors"); #endif // NDEBUG - impl->notifyBlockBeingInlined(dest, source, before); + // If a listener is attached to the dialect conversion, ops cannot be moved + // to the destination block in bulk ("fast path"). This is because at the time + // the notifications are sent, it is unknown which ops were moved. Instead, + // ops should be moved one-by-one ("slow path"), so that a separate + // `MoveOperationRewrite` is enqueued for each moved op. Moving ops in bulk is + // a bit more efficient, so we try to do that when possible. + bool fastPath = !impl->config.listener; + + if (fastPath) + impl->notifyBlockBeingInlined(dest, source, before); + + // Replace all uses of block arguments. for (auto it : llvm::zip(source->getArguments(), argValues)) replaceUsesOfBlockArgument(std::get<0>(it), std::get<1>(it)); - dest->getOperations().splice(before, source->getOperations()); + + if (fastPath) { + // Move all ops at once. + dest->getOperations().splice(before, source->getOperations()); + } else { + // Move op by op. + while (!source->empty()) + moveOpBefore(&source->front(), dest, before); + } + + // Erase the source block. eraseBlock(source); } diff --git a/mlir/python/mlir/dialects/arith.py b/mlir/python/mlir/dialects/arith.py index 61c6917393f1f9..92da5df9bce665 100644 --- a/mlir/python/mlir/dialects/arith.py +++ b/mlir/python/mlir/dialects/arith.py @@ -5,6 +5,8 @@ from ._arith_ops_gen import * from ._arith_ops_gen import _Dialect from ._arith_enum_gen import * +from array import array as _array +from typing import overload try: from ..ir import * @@ -43,13 +45,37 @@ def _is_float_type(type: Type): class ConstantOp(ConstantOp): """Specialization for the constant op class.""" + @overload + def __init__(self, value: Attribute, *, loc=None, ip=None): + ... + + @overload def __init__( - self, result: Type, value: Union[int, float, Attribute], *, loc=None, ip=None + self, result: Type, value: Union[int, float, _array], *, loc=None, ip=None ): + ... + + def __init__(self, result, value, *, loc=None, ip=None): + if value is None: + assert isinstance(result, Attribute) + super().__init__(result, loc=loc, ip=ip) + return + if isinstance(value, int): super().__init__(IntegerAttr.get(result, value), loc=loc, ip=ip) elif isinstance(value, float): super().__init__(FloatAttr.get(result, value), loc=loc, ip=ip) + elif isinstance(value, _array): + if 8 * value.itemsize != result.element_type.width: + raise ValueError( + f"Mismatching array element ({8 * value.itemsize}) and type ({result.element_type.width}) width." + ) + if value.typecode in ["i", "l", "q"]: + super().__init__(DenseIntElementsAttr.get(value, type=result)) + elif value.typecode in ["f", "d"]: + super().__init__(DenseFPElementsAttr.get(value, type=result)) + else: + raise ValueError(f'Unsupported typecode: "{value.typecode}".') else: super().__init__(value, loc=loc, ip=ip) @@ -79,6 +105,6 @@ def literal_value(self) -> Union[int, float]: def constant( - result: Type, value: Union[int, float, Attribute], *, loc=None, ip=None + result: Type, value: Union[int, float, Attribute, _array], *, loc=None, ip=None ) -> Value: return _get_op_result_or_op_results(ConstantOp(result, value, loc=loc, ip=ip)) diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index 1dd265ffe6b5e5..9167d50ffeed41 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -1494,6 +1494,10 @@ int printAffineMap(MlirContext ctx) { // CHECK: (d0, d1, d2) -> (d0) // CHECK: (d0, d1, d2) -> (d2) + // CHECK: distinct[0]<"foo"> + mlirAttributeDump(mlirDisctinctAttrCreate( + mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("foo")))); + return 0; } diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c index 1817988dd67dd6..2fd98b29f487c8 100644 --- a/mlir/test/CAPI/llvm.c +++ b/mlir/test/CAPI/llvm.c @@ -10,9 +10,12 @@ // RUN: mlir-capi-llvm-test 2>&1 | FileCheck %s #include "mlir-c/Dialect/LLVM.h" +#include "mlir-c/BuiltinAttributes.h" #include "mlir-c/BuiltinTypes.h" #include "mlir-c/IR.h" #include "mlir-c/Support.h" +#include "llvm-c/Core.h" +#include "llvm-c/DebugInfo.h" #include #include @@ -77,7 +80,7 @@ static void testTypeCreation(MlirContext ctx) { // CHECK-LABEL: testStructTypeCreation static int testStructTypeCreation(MlirContext ctx) { - fprintf(stderr, "testStructTypeCreation"); + fprintf(stderr, "testStructTypeCreation\n"); // CHECK: !llvm.struct<()> mlirTypeDump(mlirLLVMStructTypeLiteralGet(ctx, /*nFieldTypes=*/0, @@ -225,12 +228,120 @@ static int testStructTypeCreation(MlirContext ctx) { return 0; } +// CHECK-LABEL: testLLVMAttributes +static void testLLVMAttributes(MlirContext ctx) { + fprintf(stderr, "testLLVMAttributes\n"); + + // CHECK: #llvm.linkage + mlirAttributeDump(mlirLLVMLinkageAttrGet(ctx, MlirLLVMLinkageInternal)); + // CHECK: #llvm.cconv + mlirAttributeDump(mlirLLVMCConvAttrGet(ctx, MlirLLVMCConvC)); + // CHECK: #llvm + mlirAttributeDump(mlirLLVMComdatAttrGet(ctx, MlirLLVMComdatAny)); +} + +// CHECK-LABEL: testDebugInfoAttributes +static void testDebugInfoAttributes(MlirContext ctx) { + fprintf(stderr, "testDebugInfoAttributes\n"); + + MlirAttribute foo = + mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("foo")); + MlirAttribute bar = + mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("bar")); + MlirAttribute id = mlirDisctinctAttrCreate(foo); + + // CHECK: #llvm.di_null_type + mlirAttributeDump(mlirLLVMDINullTypeAttrGet(ctx)); + + // CHECK: #llvm.di_basic_type + MlirAttribute di_type = + mlirLLVMDIBasicTypeAttrGet(ctx, 0, foo, 64, MlirLLVMTypeEncodingSigned); + mlirAttributeDump(di_type); + + MlirAttribute file = mlirLLVMDIFileAttrGet(ctx, foo, bar); + + // CHECK: #llvm.di_file<"foo" in "bar"> + mlirAttributeDump(file); + + MlirAttribute compile_unit = + mlirLLVMDICompileUnitAttrGet(ctx, id, LLVMDWARFSourceLanguageC99, file, + foo, false, MlirLLVMDIEmissionKindFull); + + // CHECK: #llvm.di_compile_unit<{{.*}}> + mlirAttributeDump(compile_unit); + + MlirAttribute di_module = mlirLLVMDIModuleAttrGet( + ctx, file, compile_unit, foo, + mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("")), bar, foo, 1, + 0); + // CHECK: #llvm.di_module<{{.*}}> + mlirAttributeDump(di_module); + + // CHECK: #llvm.di_compile_unit<{{.*}}> + mlirAttributeDump(mlirLLVMDIModuleAttrGetScope(di_module)); + + // CHECK: 1 : i32 + mlirAttributeDump(mlirLLVMDIFlagsAttrGet(ctx, 0x1)); + + // CHECK: #llvm.di_lexical_block<{{.*}}> + mlirAttributeDump( + mlirLLVMDILexicalBlockAttrGet(ctx, compile_unit, file, 1, 2)); + + // CHECK: #llvm.di_lexical_block_file<{{.*}}> + mlirAttributeDump( + mlirLLVMDILexicalBlockFileAttrGet(ctx, compile_unit, file, 3)); + + // CHECK: #llvm.di_local_variable<{{.*}}> + mlirAttributeDump(mlirLLVMDILocalVariableAttrGet(ctx, compile_unit, foo, file, + 1, 0, 8, di_type)); + // CHECK: #llvm.di_derived_type<{{.*}}> + mlirAttributeDump( + mlirLLVMDIDerivedTypeAttrGet(ctx, 0, bar, di_type, 64, 8, 0)); + + // CHECK: #llvm.di_composite_type<{{.*}}> + mlirAttributeDump(mlirLLVMDICompositeTypeAttrGet( + ctx, 0, foo, file, 1, compile_unit, di_type, 0, 64, 8, 1, &di_type)); + + MlirAttribute subroutine_type = + mlirLLVMDISubroutineTypeAttrGet(ctx, 0x0, 1, &di_type); + + // CHECK: #llvm.di_subroutine_type<{{.*}}> + mlirAttributeDump(subroutine_type); + + MlirAttribute di_subprogram = + mlirLLVMDISubprogramAttrGet(ctx, id, compile_unit, compile_unit, foo, bar, + file, 1, 2, 0, subroutine_type); + // CHECK: #llvm.di_subprogram<{{.*}}> + mlirAttributeDump(di_subprogram); + + // CHECK: #llvm.di_compile_unit<{{.*}}> + mlirAttributeDump(mlirLLVMDISubprogramAttrGetScope(di_subprogram)); + + // CHECK: #llvm.di_file<{{.*}}> + mlirAttributeDump(mlirLLVMDISubprogramAttrGetFile(di_subprogram)); + + // CHECK: #llvm.di_subroutine_type<{{.*}}> + mlirAttributeDump(mlirLLVMDISubprogramAttrGetType(di_subprogram)); + + MlirAttribute expression_elem = + mlirLLVMDIExpressionElemAttrGet(ctx, 1, 1, &(uint64_t){1}); + + // CHECK: #llvm + mlirAttributeDump(expression_elem); + + // CHECK: #llvm.di_expression<[(1)]> + mlirAttributeDump(mlirLLVMDIExpressionAttrGet(ctx, 1, &expression_elem)); +} + int main(void) { MlirContext ctx = mlirContextCreate(); mlirDialectHandleRegisterDialect(mlirGetDialectHandle__llvm__(), ctx); mlirContextGetOrLoadDialect(ctx, mlirStringRefCreateFromCString("llvm")); testTypeCreation(ctx); int result = testStructTypeCreation(ctx); + testLLVMAttributes(ctx); + testDebugInfoAttributes(ctx); mlirContextDestroy(ctx); if (result) fprintf(stderr, "FAILED: code %d", result); diff --git a/mlir/test/Dialect/ArmSME/vector-legalization.mlir b/mlir/test/Dialect/ArmSME/vector-legalization.mlir index bf0b58ff4cf073..f8be697548c197 100644 --- a/mlir/test/Dialect/ArmSME/vector-legalization.mlir +++ b/mlir/test/Dialect/ArmSME/vector-legalization.mlir @@ -388,3 +388,48 @@ func.func @illegal_transpose_no_defining_source_op(%vec: vector<[4]x1xf32>) -> v %0 = vector.transpose %vec, [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> return %0 : vector<1x[4]xf32> } + +// ----- + +// CHECK-LABEL: @illegal_shape_cast_to_transpose_2d( +// CHECK-SAME: %[[VEC:.*]]: vector<[4]x1xf32>) +func.func @illegal_shape_cast_to_transpose_2d(%vec: vector<[4]x1xf32>) -> vector<1x[4]xf32> { + // CHECK: vector.transpose %[[VEC]], [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> + %0 = vector.shape_cast %vec : vector<[4]x1xf32> to vector<1x[4]xf32> + return %0 : vector<1x[4]xf32> +} + +// ----- + +// CHECK-LABEL: @illegal_shape_cast_to_transpose_1d( +// CHECK-SAME: %[[VEC:.*]]: vector<[4]x1xf32>) +func.func @illegal_shape_cast_to_transpose_1d(%vec: vector<[4]x1xf32>) -> vector<[4]xf32> { + // CHECK: %[[TRANSPOSE:.*]] = vector.transpose %[[VEC]], [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> + // CHECK: vector.shape_cast %[[TRANSPOSE]] : vector<1x[4]xf32> to vector<[4]xf32> + %0 = vector.shape_cast %vec : vector<[4]x1xf32> to vector<[4]xf32> + return %0 : vector<[4]xf32> +} + +// ----- + +// CHECK-LABEL: @lift_illegal_2d_shape_cast_to_memory +func.func @lift_illegal_2d_shape_cast_to_memory(%a: index, %b: index, %memref: memref) -> vector<1x[4]xf32> { + // CHECK: vector.transfer_read {{.*}} : memref, vector<1x[4]xf32> + // CHECK-NOT: vector.shape_cast + %pad = arith.constant 0.0 : f32 + %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = [false, true]}: memref, vector<[4]x1xf32> + %cast = vector.shape_cast %illegalRead : vector<[4]x1xf32> to vector<1x[4]xf32> + return %cast : vector<1x[4]xf32> +} + +// ----- + +// CHECK-LABEL: @lift_illegal_1d_shape_cast_to_memory +func.func @lift_illegal_1d_shape_cast_to_memory(%a: index, %b: index, %memref: memref) -> vector<[4]xf32> { + // CHECK: vector.transfer_read {{.*}} : memref, vector<1x[4]xf32> + // CHECK-NOT: vector.shape_cast {{.*}} : vector<[4]x1xf32> to vector<[4]xf32> + %pad = arith.constant 0.0 : f32 + %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = [false, true]}: memref, vector<[4]x1xf32> + %cast = vector.shape_cast %illegalRead : vector<[4]x1xf32> to vector<[4]xf32> + return %cast : vector<[4]xf32> +} diff --git a/mlir/test/Dialect/Linalg/mesh-spmdization.mlir b/mlir/test/Dialect/Linalg/mesh-spmdization.mlir new file mode 100644 index 00000000000000..6d21def8de2753 --- /dev/null +++ b/mlir/test/Dialect/Linalg/mesh-spmdization.mlir @@ -0,0 +1,165 @@ +// RUN: mlir-opt \ +// RUN: --mesh-spmdization \ +// RUN: --test-constant-fold \ +// RUN: --split-input-file \ +// RUN: %s | FileCheck %s + +// CHECK: #[[$MAP_IDENTITY_1D:.*]] = affine_map<(d0) -> (d0)> +#map_identity_1d = affine_map<(d0) -> (d0)> + +mesh.mesh @mesh_1d(shape = 2) + +// CHECK-LABEL: func @elementwise_static_1d_mesh_static_1d_tensor +func.func @elementwise_static_1d_mesh_static_1d_tensor( + // CHECK-SAME: %[[IN1:[A-Za-z0-9_]+]]: tensor<1xi8>, + %in1: tensor<2xi8>, + // CHECK-SAME: %[[IN2:[A-Za-z0-9_]+]]: tensor<1xi8>, + %in2: tensor<2xi8>, + // CHECK-SAME: %[[DPS_OUT:[A-Za-z0-9_]+]]: tensor<1xi8> + %dps_out: tensor<2xi8> +// CHECK-SAME: -> tensor<1xi8> { +) -> tensor<2xi8> { + %in1_shared1 = mesh.shard %in1 to <@mesh_1d, [[0]]> : tensor<2xi8> + %in1_shared2 = mesh.shard %in1_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<2xi8> + %in2_shared1 = mesh.shard %in2 to <@mesh_1d, [[0]]> : tensor<2xi8> + %in2_shared2 = mesh.shard %in2_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<2xi8> + %dps_out_shared1 = mesh.shard %dps_out to <@mesh_1d, [[0]]> : tensor<2xi8> + %dps_out_shared2 = mesh.shard %dps_out_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<2xi8> + // CHECK: %[[RES:.*]] = linalg.generic { + // CHECK-SAME: indexing_maps = [#[[$MAP_IDENTITY_1D]], #[[$MAP_IDENTITY_1D]], #[[$MAP_IDENTITY_1D]]], + // CHECK-SAME: iterator_types = ["parallel"]} + // CHECK-SAME: ins(%[[IN1]], %[[IN2]] : tensor<1xi8>, tensor<1xi8>) + // CHECK-SAME: outs(%[[DPS_OUT]] : tensor<1xi8>) { + %res = linalg.generic { + indexing_maps = [#map_identity_1d, #map_identity_1d, #map_identity_1d], + iterator_types = ["parallel"] + } ins(%in1_shared2, %in2_shared2 : tensor<2xi8>, tensor<2xi8>) + outs(%dps_out_shared2 : tensor<2xi8>) { + ^bb0(%in1_scalar: i8, %in2_scalar: i8, %out: i8): + %res_scalar = arith.muli %in1_scalar, %in2_scalar : i8 + linalg.yield %res_scalar : i8 + } -> tensor<2xi8> + %res_shared1 = mesh.shard %res to <@mesh_1d, [[0]]> : tensor<2xi8> + %res_shared2 = mesh.shard %res_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<2xi8> + // CHECK: return %[[RES]] : tensor<1xi8> + return %res_shared2 : tensor<2xi8> +} + +// ----- + +mesh.mesh @mesh_1d(shape = 4) + +// CHECK-LABEL: func @matmul_1d_mesh_static_tensors_parallel_iterator_sharding +func.func @matmul_1d_mesh_static_tensors_parallel_iterator_sharding( + // CHECK-SAME: %[[IN1:[A-Za-z0-9_]+]]: tensor<1x3xi8>, + %in1: tensor<4x3xi8>, +// CHECK-SAME: %[[IN2:[A-Za-z0-9_]+]]: tensor<3x8xi8>, + %in2: tensor<3x8xi8>, +// CHECK-SAME: %[[DPS_OUT:[A-Za-z0-9_]+]]: tensor<1x8xi8> + %dps_out: tensor<4x8xi8> +// CHECK-SAME: -> tensor<1x8xi8> { +) -> tensor<4x8xi8> { + %in1_shared1 = mesh.shard %in1 to <@mesh_1d, [[0]]> : tensor<4x3xi8> + %in1_shared2 = mesh.shard %in1_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<4x3xi8> + %in2_shared1 = mesh.shard %in2 to <@mesh_1d, [[]]> : tensor<3x8xi8> + %in2_shared2 = mesh.shard %in2_shared1 to <@mesh_1d, [[]]> annotate_for_users: tensor<3x8xi8> + %dps_out_shared1 = mesh.shard %dps_out to <@mesh_1d, [[0]]> : tensor<4x8xi8> + %dps_out_shared2 = mesh.shard %dps_out_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<4x8xi8> + // CHECK: %[[RES:.*]] = linalg.matmul + // CHECK-SAME: ins(%[[IN1]], %[[IN2]] : tensor<1x3xi8>, tensor<3x8xi8>) + // CHECK-SAME: outs(%[[DPS_OUT]] : tensor<1x8xi8>) + // CHECK-SAME: -> tensor<1x8xi8> + %res = linalg.matmul ins(%in1_shared2, %in2_shared2 : tensor<4x3xi8>, tensor<3x8xi8>) + outs(%dps_out_shared2 : tensor<4x8xi8>) -> tensor<4x8xi8> + %res_shared1 = mesh.shard %res to <@mesh_1d, [[0]]> : tensor<4x8xi8> + %res_shared2 = mesh.shard %res_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<4x8xi8> + // CHECK: return %[[RES]] : tensor<1x8xi8> + return %res_shared2 : tensor<4x8xi8> +} + +// ----- + +mesh.mesh @mesh_1d(shape = 3) + +// CHECK-LABEL: func @matmul_1d_mesh_static_tensors_reduction_iterator_sharding +func.func @matmul_1d_mesh_static_tensors_reduction_iterator_sharding( + // CHECK-SAME: %[[IN1:[A-Za-z0-9_]+]]: tensor<4x2xi8>, + %in1: tensor<4x6xi8>, +// CHECK-SAME: %[[IN2:[A-Za-z0-9_]+]]: tensor<2x8xi8>, + %in2: tensor<6x8xi8>, +// CHECK-SAME: %[[DPS_OUT:[A-Za-z0-9_]+]]: tensor<4x8xi8> + %dps_out: tensor<4x8xi8> +// CHECK-SAME: -> tensor<4x8xi8> { +) -> tensor<4x8xi8> { + %in1_shared1 = mesh.shard %in1 to <@mesh_1d, [[], [0]]> : tensor<4x6xi8> + %in1_shared2 = mesh.shard %in1_shared1 to <@mesh_1d, [[], [0]]> annotate_for_users: tensor<4x6xi8> + %in2_shared1 = mesh.shard %in2 to <@mesh_1d, [[0]]> : tensor<6x8xi8> + %in2_shared2 = mesh.shard %in2_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<6x8xi8> + %dps_out_shared1 = mesh.shard %dps_out to <@mesh_1d, [[]]> : tensor<4x8xi8> + %dps_out_shared2 = mesh.shard %dps_out_shared1 to <@mesh_1d, [[]]> annotate_for_users: tensor<4x8xi8> + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[C0_I8:.*]] = arith.constant 0 : i8 + // CHECK-DAG: %[[PROCESS_IDX:.*]] = mesh.process_multi_index on @mesh_1d axes = [0] : index + // CHECK-DAG: %[[MESH_SIZE:.*]] = mesh.mesh_shape @mesh_1d axes = [0] : index + // CHECK: %[[DPS_INIT_OPERAND_CONDITION:.*]] = arith.cmpi eq, %[[PROCESS_IDX]], %[[C0]] : index + // CHECK: %[[DPS_INIT_OPERAND:.*]] = scf.if %[[DPS_INIT_OPERAND_CONDITION]] -> (tensor<4x8xi8>) { + // CHECK: scf.yield %[[DPS_OUT]] : tensor<4x8xi8> + // CHECK: } else { + // CHECK-DAG: %[[EMPTY_TENSOR:.*]] = tensor.empty() : tensor<4x8xi8> + // CHECK: %[[NEUTRAL_ELEMENT_FILLED_TENSOR:.*]] = linalg.fill ins(%[[C0_I8]] : i8) + // CHECK-SAME: outs(%[[EMPTY_TENSOR]] : tensor<4x8xi8>) -> tensor<4x8xi8> + // CHECK: scf.yield %[[NEUTRAL_ELEMENT_FILLED_TENSOR]] : tensor<4x8xi8> + // CHECK: } + // CHECK: %[[SHARDED_MATMUL:.*]] = linalg.matmul ins(%[[IN1]], %[[IN2]] : tensor<4x2xi8>, tensor<2x8xi8>) + // CHECK-SAME: outs(%[[DPS_INIT_OPERAND]] : tensor<4x8xi8>) -> tensor<4x8xi8> + // CHECK: %[[ALL_REDUCED:.*]] = mesh.all_reduce %[[SHARDED_MATMUL]] on @mesh_1d mesh_axes = [0] : tensor<4x8xi8> -> tensor<4x8xi8> + %res = linalg.matmul ins(%in1_shared2, %in2_shared2 : tensor<4x6xi8>, tensor<6x8xi8>) + outs(%dps_out_shared2 : tensor<4x8xi8>) -> tensor<4x8xi8> + %res_shared1 = mesh.shard %res to <@mesh_1d, [[]]> : tensor<4x8xi8> + %res_shared2 = mesh.shard %res_shared1 to <@mesh_1d, [[]]> annotate_for_users: tensor<4x8xi8> + // CHECK: return %[[ALL_REDUCED]] : tensor<4x8xi8> + return %res_shared2 : tensor<4x8xi8> +} + +// ----- + +mesh.mesh @mesh_1d(shape = 3) + +// CHECK-LABEL: func @matmul_1d_mesh_static_tensors_reduction_iterator_sharding_with_partial_result +func.func @matmul_1d_mesh_static_tensors_reduction_iterator_sharding_with_partial_result( + // CHECK-SAME: %[[IN1:[A-Za-z0-9_]+]]: tensor<4x2xi8>, + %in1: tensor<4x6xi8>, +// CHECK-SAME: %[[IN2:[A-Za-z0-9_]+]]: tensor<2x8xi8>, + %in2: tensor<6x8xi8>, +// CHECK-SAME: %[[DPS_OUT:[A-Za-z0-9_]+]]: tensor<4x8xi8> + %dps_out: tensor<4x8xi8> +// CHECK-SAME: -> tensor<4x8xi8> { +) -> tensor<4x8xi8> { + %in1_shared1 = mesh.shard %in1 to <@mesh_1d, [[], [0]]> : tensor<4x6xi8> + %in1_shared2 = mesh.shard %in1_shared1 to <@mesh_1d, [[], [0]]> annotate_for_users: tensor<4x6xi8> + %in2_shared1 = mesh.shard %in2 to <@mesh_1d, [[0]]> : tensor<6x8xi8> + %in2_shared2 = mesh.shard %in2_shared1 to <@mesh_1d, [[0]]> annotate_for_users: tensor<6x8xi8> + %dps_out_shared1 = mesh.shard %dps_out to <@mesh_1d, [[]]> : tensor<4x8xi8> + %dps_out_shared2 = mesh.shard %dps_out_shared1 to <@mesh_1d, [[]]> annotate_for_users: tensor<4x8xi8> + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[C0_I8:.*]] = arith.constant 0 : i8 + // CHECK-DAG: %[[PROCESS_IDX:.*]] = mesh.process_multi_index on @mesh_1d axes = [0] : index + // CHECK-DAG: %[[MESH_SIZE:.*]] = mesh.mesh_shape @mesh_1d axes = [0] : index + // CHECK: %[[DPS_INIT_OPERAND_CONDITION:.*]] = arith.cmpi eq, %[[PROCESS_IDX]], %[[C0]] : index + // CHECK: %[[DPS_INIT_OPERAND:.*]] = scf.if %[[DPS_INIT_OPERAND_CONDITION]] -> (tensor<4x8xi8>) { + // CHECK: scf.yield %[[DPS_OUT]] : tensor<4x8xi8> + // CHECK: } else { + // CHECK-DAG: %[[EMPTY_TENSOR:.*]] = tensor.empty() : tensor<4x8xi8> + // CHECK: %[[NEUTRAL_ELEMENT_FILLED_TENSOR:.*]] = linalg.fill ins(%[[C0_I8]] : i8) + // CHECK-SAME: outs(%[[EMPTY_TENSOR]] : tensor<4x8xi8>) -> tensor<4x8xi8> + // CHECK: scf.yield %[[NEUTRAL_ELEMENT_FILLED_TENSOR]] : tensor<4x8xi8> + // CHECK: } + // CHECK: %[[SHARDED_MATMUL:.*]] = linalg.matmul ins(%[[IN1]], %[[IN2]] : tensor<4x2xi8>, tensor<2x8xi8>) + // CHECK-SAME: outs(%[[DPS_INIT_OPERAND]] : tensor<4x8xi8>) -> tensor<4x8xi8> + %res = linalg.matmul ins(%in1_shared2, %in2_shared2 : tensor<4x6xi8>, tensor<6x8xi8>) + outs(%dps_out_shared2 : tensor<4x8xi8>) -> tensor<4x8xi8> + %res_shared1 = mesh.shard %res to <@mesh_1d, [[]], partial = sum[0]> : tensor<4x8xi8> + %res_shared2 = mesh.shard %res_shared1 to <@mesh_1d, [[]], partial = sum[0]> annotate_for_users: tensor<4x8xi8> + // CHECK: return %[[SHARDED_MATMUL]] : tensor<4x8xi8> + return %res_shared2 : tensor<4x8xi8> +} diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index 70747b7e2acf4b..ec5430420524ce 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -738,3 +738,43 @@ func.func @acc_atomic_capture(%x: memref, %y: memref, %v: memref, acc.terminator } } + +// ----- + +func.func @acc_combined() { + // expected-error @below {{expected 'loop'}} + acc.parallel combined() { + } + + return +} + +// ----- + +func.func @acc_combined() { + // expected-error @below {{expected compute construct name}} + acc.loop combined(loop) { + } + + return +} + +// ----- + +func.func @acc_combined() { + // expected-error @below {{expected 'loop'}} + acc.parallel combined(parallel loop) { + } + + return +} + +// ----- + +func.func @acc_combined() { + // expected-error @below {{expected ')'}} + acc.loop combined(parallel loop) { + } + + return +} diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index 1739b3de3e65fd..2ef2178cb2b63a 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -1846,9 +1846,49 @@ func.func @acc_atomic_capture(%v: memref, %x: memref, %expr: i32) { // ----- -%c2 = arith.constant 2 : i32 -%c1 = arith.constant 1 : i32 -acc.parallel num_gangs({%c2 : i32} [#acc.device_type], {%c1 : i32, %c1 : i32, %c1 : i32} [#acc.device_type]) { +// CHECK-LABEL: func.func @acc_num_gangs +func.func @acc_num_gangs() { + %c2 = arith.constant 2 : i32 + %c1 = arith.constant 1 : i32 + acc.parallel num_gangs({%c2 : i32} [#acc.device_type], {%c1 : i32, %c1 : i32, %c1 : i32} [#acc.device_type]) { + } + + return } // CHECK: acc.parallel num_gangs({%c2{{.*}} : i32} [#acc.device_type], {%c1{{.*}} : i32, %c1{{.*}} : i32, %c1{{.*}} : i32} [#acc.device_type]) + +// ----- + +// CHECK-LABEL: func.func @acc_combined +func.func @acc_combined() { + acc.parallel combined(loop) { + acc.loop combined(parallel) { + acc.yield + } + acc.terminator + } + + acc.kernels combined(loop) { + acc.loop combined(kernels) { + acc.yield + } + acc.terminator + } + + acc.serial combined(loop) { + acc.loop combined(serial) { + acc.yield + } + acc.terminator + } + + return +} + +// CHECK: acc.parallel combined(loop) +// CHECK: acc.loop combined(parallel) +// CHECK: acc.kernels combined(loop) +// CHECK: acc.loop combined(kernels) +// CHECK: acc.serial combined(loop) +// CHECK: acc.loop combined(serial) diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir index bdad713709afa2..b9d1148301dd16 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -88,7 +88,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %i0 = arith.constant 0 : i32 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir index 30e620b9d610ba..d615cb66c3d08d 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -84,7 +84,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %f0 = arith.constant 0.0 : f64 %cst0 = arith.constant 0 : index %cst1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir index f7bcd1122d46c1..c30c6b9b5cc2f5 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -60,7 +60,7 @@ module { } // Driver method to call and verify kernel. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f32 @@ -84,11 +84,18 @@ module { // // Verify results. Only two entries stored in result! // - // CHECK: ( 14, 20, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 2 + // CHECK-NEXT: dim = ( 32, 16 ) + // CHECK-NEXT: lvl = ( 32, 16 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 2, 31 + // CHECK-NEXT: pos[1] : ( 0, 1, 2 + // CHECK-NEXT: crd[1] : ( 2, 0 + // CHECK-NEXT: values : ( 14, 20 + // CHECK-NEXT: ---- // - %val = sparse_tensor.values %0 : tensor<32x16xf32, #DCSR> to memref - %vv = vector.transfer_read %val[%c0], %f0: memref, vector<4xf32> - vector.print %vv : vector<4xf32> + sparse_tensor.print %0 : tensor<32x16xf32, #DCSR> // Release the resources. bufferization.dealloc_tensor %sta : tensor<32x16xf32, #DCSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir index 17def3f52c003e..74f0e7698bc14b 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -70,10 +70,7 @@ module { } // Driver method to call and verify tensor kernel. - func.func @entry() { - %c0 = arith.constant 0 : index - %i0 = arith.constant 0 : i32 - + func.func @main() { // Setup very sparse 3-d tensors. %t1 = arith.constant sparse< [ [1,1,3], [2,0,0], [2,2,1], [2,2,2], [2,2,3] ], [ 1, 2, 3, 4, 5 ] @@ -94,23 +91,23 @@ module { // // Verify results. Only two entries stored in result. Correct structure. // - // CHECK: ( 7, 69, 0, 0 ) - // CHECK-NEXT: ( ( 0, 0, 0 ), ( 0, 7, 0 ), ( 0, 0, 69 ) ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 2 + // CHECK-NEXT: dim = ( 3, 3 ) + // CHECK-NEXT: lvl = ( 3, 3 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 1, 2 + // CHECK-NEXT: pos[1] : ( 0, 1, 2 + // CHECK-NEXT: crd[1] : ( 1, 2 + // CHECK-NEXT: values : ( 7, 69 + // CHECK-NEXT: ---- // - %val = sparse_tensor.values %0 - : tensor to memref - %vv = vector.transfer_read %val[%c0], %i0: memref, vector<4xi32> - vector.print %vv : vector<4xi32> - %dm = sparse_tensor.convert %0 - : tensor to tensor - %vm = vector.transfer_read %dm[%c0, %c0], %i0: tensor, vector<3x3xi32> - vector.print %vm : vector<3x3xi32> + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %st1 : tensor bufferization.dealloc_tensor %st2 : tensor bufferization.dealloc_tensor %0 : tensor - bufferization.dealloc_tensor %dm : tensor return } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir index e2d8c4fd4628d9..88513c80219a85 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -70,7 +70,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %d0 = arith.constant 0.0 : f64 %c0 = arith.constant 0 : index @@ -83,11 +83,18 @@ module { // Print the result for verification. // - // CHECK: ( 1, 1.96, 4, 6.25, 9, 16.81, 16, 27.04, 25 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 5, 5 ) + // CHECK-NEXT: lvl = ( 5, 5 ) + // CHECK-NEXT: pos[0] : ( 0, 5 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4 + // CHECK-NEXT: pos[1] : ( 0, 2, 4, 5, 7, 9 + // CHECK-NEXT: crd[1] : ( 0, 3, 1, 4, 2, 0, 3, 1, 4 + // CHECK-NEXT: values : ( 1, 1.96, 4, 6.25, 9, 16.81, 16, 27.04, 25 + // CHECK-NEXT: ---- // - %m = sparse_tensor.values %0 : tensor to memref - %v = vector.transfer_read %m[%c0], %d0: memref, vector<9xf64> - vector.print %v : vector<9xf64> + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %x : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir index b792d00681ddb4..7cde6b93d3250c 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -54,7 +54,7 @@ module { // // Main driver. // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f64 %i0 = arith.constant 0 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir index 8a65e2449c1574..aa1bd04fde87dc 100755 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -48,7 +48,7 @@ module { // // Main driver. // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f32 @@ -107,26 +107,39 @@ module { // // Verify. // - // CHECK: ( ( ( 1, 2 ), ( 3, 4 ), ( 0, 0 ) ), ( ( 0, 0 ), ( 0, 0 ), ( 0, 0 ) ), ( ( 0, 0 ), ( 5, 0 ), ( 6, 7 ) ), ( ( 0, 0 ), ( 8, 0 ), ( 0, 0 ) ) ) - // CHECK: ( ( ( 1, 2 ), ( 0, 3 ), ( 4, 0 ) ), ( ( 5, 6 ), ( 0, 0 ), ( 0, 7 ) ), ( ( 8, 9 ), ( 10, 11 ), ( 12, 13 ) ), ( ( 14, 0 ), ( 0, 15 ), ( 0, 16 ) ) ) - // CHECK: ( ( ( 1, 2 ), ( 0, 3 ), ( 4, 0 ) ), ( ( 5, 6 ), ( 0, 0 ), ( 0, 7 ) ), ( ( 8, 9 ), ( 10, 11 ), ( 12, 13 ) ), ( ( 14, 0 ), ( 0, 15 ), ( 0, 16 ) ) ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 8 + // CHECK-NEXT: dim = ( 4, 3, 2 ) + // CHECK-NEXT: lvl = ( 4, 3, 2 ) + // CHECK-NEXT: pos[0] : ( 0, 3 + // CHECK-NEXT: crd[0] : ( 0, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 4, 5 + // CHECK-NEXT: crd[1] : ( 0, 1, 1, 2, 1 + // CHECK-NEXT: pos[2] : ( 0, 2, 4, 5, 7, 8 + // CHECK-NEXT: crd[2] : ( 0, 1, 0, 1, 0, 0, 1, 0 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 16 + // CHECK-NEXT: dim = ( 4, 3, 2 ) + // CHECK-NEXT: lvl = ( 4, 3, 2 ) + // CHECK-NEXT: pos[2] : ( 0, 2, 3, 4, 6, 6, 7, 9, 11, 13, 14, 15, 16 + // CHECK-NEXT: crd[2] : ( 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 22 + // CHECK-NEXT: dim = ( 4, 3, 2 ) + // CHECK-NEXT: lvl = ( 4, 3, 2 ) + // CHECK-NEXT: pos[1] : ( 0, 3, 5, 8, 11 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 2, 0, 1, 2, 0, 1, 2 + // CHECK-NEXT: values : ( 1, 2, 0, 3, 4, 0, 5, 6, 0, 7, 8, 9, 10, 11, 12, 13, 14, 0, 0, 15, 0, 16 + // CHECK-NEXT: ---- // + sparse_tensor.print %s0 : tensor<4x3x2xf32, #CCC> + sparse_tensor.print %s1 : tensor<4x3x2xf32, #BatchedCSR> + sparse_tensor.print %s2 : tensor<4x3x2xf32, #CSRDense> - %d0 = sparse_tensor.convert %s0 : tensor<4x3x2xf32, #CCC> to tensor<4x3x2xf32> - %v0 = vector.transfer_read %d0[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> - vector.print %v0 : vector<4x3x2xf32> - - %d1 = sparse_tensor.convert %s1 : tensor<4x3x2xf32, #BatchedCSR> to tensor<4x3x2xf32> - %v1 = vector.transfer_read %d1[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> - vector.print %v1 : vector<4x3x2xf32> - - %d2 = sparse_tensor.convert %s2 : tensor<4x3x2xf32, #CSRDense> to tensor<4x3x2xf32> - %v2 = vector.transfer_read %d1[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> - vector.print %v2 : vector<4x3x2xf32> - - bufferization.dealloc_tensor %d0 : tensor<4x3x2xf32> - bufferization.dealloc_tensor %d1 : tensor<4x3x2xf32> - bufferization.dealloc_tensor %d2 : tensor<4x3x2xf32> // FIXME: doing this explicitly crashes runtime // bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC> // bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pooling_nhwc.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pooling_nhwc.mlir index 3ce089d7a7cf6b..39699fbdb14e59 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pooling_nhwc.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pooling_nhwc.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -47,7 +47,7 @@ func.func @pooling_nhwc_sum(%input: tensor<1x4x4x1xf32>, %filter: tensor<2x2xf32 } -func.func @entry() { +func.func @main() { %c0 = arith.constant 0 : index %zero = arith.constant 0.00000e+00 : f32 @@ -76,17 +76,26 @@ func.func @entry() { // // Sparse pooling should have the same output. // - - // CHECK-NEXT: ( ( ( ( 6 ), ( 6 ), ( 6 ) ), ( ( 6 ), ( 6 ), ( 6 ) ), ( ( 6 ), ( 6 ), ( 6 ) ) ) ) - %s1 = sparse_tensor.convert %CCCC_ret : tensor<1x3x3x1xf32, #CCCC> to tensor<1x3x3x1xf32> - %v1 = vector.transfer_read %s1[%c0, %c0, %c0, %c0], %zero - : tensor<1x3x3x1xf32>, vector<1x3x3x1xf32> - vector.print %v1 : vector<1x3x3x1xf32> + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 1, 3, 3, 1 ) + // CHECK-NEXT: lvl = ( 1, 3, 3, 1 ) + // CHECK-NEXT: pos[0] : ( 0, 1 + // CHECK-NEXT: crd[0] : ( 0 + // CHECK-NEXT: pos[1] : ( 0, 3 + // CHECK-NEXT: crd[1] : ( 0, 1, 2 + // CHECK-NEXT: pos[2] : ( 0, 3, 6, 9 + // CHECK-NEXT: crd[2] : ( 0, 1, 2, 0, 1, 2, 0, 1, 2 + // CHECK-NEXT: pos[3] : ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: crd[3] : ( 0, 0, 0, 0, 0, 0, 0, 0, 0 + // CHECK-NEXT: values : ( 6, 6, 6, 6, 6, 6, 6, 6, 6 + // CHECK-NEXT: ---- + // + sparse_tensor.print %CCCC_ret : tensor<1x3x3x1xf32, #CCCC> // Releases resources. bufferization.dealloc_tensor %in_CCCC : tensor<1x4x4x1xf32, #CCCC> bufferization.dealloc_tensor %CCCC_ret : tensor<1x3x3x1xf32, #CCCC> bufferization.dealloc_tensor %dense_ret : tensor<1x3x3x1xf32> - bufferization.dealloc_tensor %s1 : tensor<1x3x3x1xf32> return } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir index b322d965f2dc7e..873322929232a7 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -49,7 +49,7 @@ module { return %0: tensor<5x6xi32> } - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %i0 = arith.constant 0 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir index 17219cde035d34..a927a5dfb94bc2 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -114,39 +114,8 @@ module { return %0 : tensor } - // Dumps a sparse vector of type f64. - func.func @dump_vec(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<8xf64> - vector.print %1 : vector<8xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dv[%c0], %d0: tensor, vector<16xf64> - vector.print %2 : vector<16xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - - // Dump a sparse matrix. - func.func @dump_mat(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<16xf64> - vector.print %1 : vector<16xf64> - %dm = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dm[%c0, %c0], %d0: tensor, vector<5x5xf64> - vector.print %2 : vector<5x5xf64> - bufferization.dealloc_tensor %dm : tensor - return - } - // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index // Setup sparse matrices. @@ -171,19 +140,43 @@ module { // // Verify the results. // - // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 1, 2, 0, 0, 0 ), ( 3, 0, 0, 0, 0 ), ( 0, 0, 4, 5, 6 ), ( 7, 0, 8, 9, 0 ), ( 0, 0, 0, 0, 0 ) ) - // CHECK-NEXT: ( 6, 5, 4, 3, 2, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 6, 0, 0, 0, 0 ), ( 0, 0, 0, 5, 0 ), ( 4, 0, 0, 3, 0 ), ( 0, 2, 0, 0, 0 ), ( 0, 11, 0, 0, 0 ) ) - // CHECK-NEXT: ( 7, 7, 9, 8, 7, 7, 12, 11, 11, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 7, 0, 0, 7, 0 ), ( 9, 0, 0, 0, 0 ), ( 8, 7, 0, 7, 0 ), ( 12, 11, 0, 11, 0 ), ( 0, 0, 0, 0, 0 ) ) - // CHECK-NEXT: ( 7, 7, 9, 8, 7, 7, 12, 11, 11, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 7, 0, 0, 7, 0 ), ( 9, 0, 0, 0, 0 ), ( 8, 7, 0, 7, 0 ), ( 12, 11, 0, 11, 0 ), ( 0, 0, 0, 0, 0 ) ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 5 ) + // CHECK-NEXT: lvl = ( 4, 5 ) + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 0, 2, 3, 4, 0, 2, 3 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 5, 4 ) + // CHECK-NEXT: lvl = ( 5, 4 ) + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 4, 5, 6 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 3, 1, 1 + // CHECK-NEXT: values : ( 6, 5, 4, 3, 2, 11 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 4 ) + // CHECK-NEXT: lvl = ( 4, 4 ) + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 0, 1, 3, 0, 1, 3 + // CHECK-NEXT: values : ( 7, 7, 9, 8, 7, 7, 12, 11, 11 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 4 ) + // CHECK-NEXT: lvl = ( 4, 4 ) + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 0, 1, 3, 0, 1, 3 + // CHECK-NEXT: values : ( 7, 7, 9, 8, 7, 7, 12, 11, 11 + // CHECK-NEXT: ---- // - call @dump_mat(%sm1) : (tensor) -> () - call @dump_mat(%sm2r) : (tensor) -> () - call @dump_mat(%5) : (tensor) -> () - call @dump_mat(%6) : (tensor) -> () + sparse_tensor.print %sm1 : tensor + sparse_tensor.print %sm2r : tensor + sparse_tensor.print %5 : tensor + sparse_tensor.print %6 : tensor // Release the resources. bufferization.dealloc_tensor %sm1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir index 6e2c572cf21ba0..18bf6a71c53058 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -89,39 +89,9 @@ module { return %0 : tensor } - // Dumps a sparse vector of type f64. - func.func @dump_vec(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<8xf64> - vector.print %1 : vector<8xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dv[%c0], %d0: tensor, vector<16xf64> - vector.print %2 : vector<16xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - - // Dump a sparse matrix. - func.func @dump_mat(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<16xf64> - vector.print %1 : vector<16xf64> - %dm = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dm[%c0, %c0], %d0: tensor, vector<5x5xf64> - vector.print %2 : vector<5x5xf64> - bufferization.dealloc_tensor %dm : tensor - return - } // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index // Setup sparse matrices. @@ -144,15 +114,43 @@ module { // // Verify the results. // - // CHECK: ( 2, 3, 120, 504, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 2, 3, 120, 504, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 6, 5, 12, 2, 11, 0, 0, 0 ) - // CHECK-NEXT: ( 6, 5, 12, 2, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 5 ) + // CHECK-NEXT: lvl = ( 4, 5 ) + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 0, 2, 3, 4, 0, 2, 3 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 5, 4 ) + // CHECK-NEXT: lvl = ( 5, 4 ) + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 4, 5, 6 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 3, 1, 1 + // CHECK-NEXT: values : ( 6, 5, 4, 3, 2, 11 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 4 + // CHECK-NEXT: dim = ( 4 ) + // CHECK-NEXT: lvl = ( 4 ) + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: values : ( 2, 3, 120, 504 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: dim = ( 5 ) + // CHECK-NEXT: lvl = ( 5 ) + // CHECK-NEXT: pos[0] : ( 0, 5 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4 + // CHECK-NEXT: values : ( 6, 5, 12, 2, 11 + // CHECK-NEXT: ---- // - call @dump_mat(%sm1) : (tensor) -> () - call @dump_mat(%sm2r) : (tensor) -> () - call @dump_vec(%1) : (tensor) -> () - call @dump_vec(%2) : (tensor) -> () + sparse_tensor.print %sm1 : tensor + sparse_tensor.print %sm2r : tensor + sparse_tensor.print %1 : tensor + sparse_tensor.print %2 : tensor // Release the resources. bufferization.dealloc_tensor %sm1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir index 80c35676e804b1..8588ebd98cc918 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -145,7 +145,7 @@ module { return } - func.func @entry() { + func.func @main() { %ri = arith.constant dense<0> : tensor // Sparse vector of length 8 with 2 stored elements (and thus 6 implicit zeros). diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir index 4ad23d1c031238..96ec8bad8b5515 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -106,7 +106,7 @@ module { return } - func.func @entry() { + func.func @main() { %ri = arith.constant dense< 7 > : tensor %rf = arith.constant dense< 2.0 > : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_min.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_min.mlir index 198920024221b0..16c1d7df7e89dd 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_min.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_min.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -82,7 +82,7 @@ module { return } - func.func @entry() { + func.func @main() { %ri = arith.constant dense<999> : tensor // Vectors with a few zeros. diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir index 7ec30787fea464..4797fbb8f5319c 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -162,7 +162,7 @@ module { return } - func.func @entry() { + func.func @main() { // Note: Constants bufferize to read-only buffers. %ri = arith.constant dense< 7 > : tensor %rf = arith.constant dense< 2.0 > : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir index b551f9545dc436..4c26ebe6e401ba 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -63,7 +63,7 @@ module { } - func.func @entry() { + func.func @main() { %m = arith.constant dense <[ [ 1.1, 0.0, 1.3, 0.0 ], [ 2.1, 0.0, 2.3, 0.0 ], [ 3.1, 0.0, 3.3, 0.0 ]]> : tensor<3x4xf64> @@ -76,20 +76,41 @@ module { %c0 = arith.constant 0 : index %df = arith.constant -1.0 : f64 - // CHECK: ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 - %b0 = sparse_tensor.values %reshaped0: tensor<2x6xf64, #SparseMatrix> to memref - %v0 = vector.transfer_read %b0[%c0], %df: memref, vector<12xf64> - vector.print %v0 : vector<12xf64> - - // CHECK: ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 - %b1 = sparse_tensor.values %reshaped1: tensor<12xf64, #SparseVector> to memref - %v1 = vector.transfer_read %b1[%c0], %df: memref, vector<12xf64> - vector.print %v1 : vector<12xf64> - - // CHECK: ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 - %b2 = sparse_tensor.values %reshaped2: tensor<2x3x2xf64, #Sparse3dTensor> to memref - %v2 = vector.transfer_read %b2[%c0], %df: memref, vector<12xf64> - vector.print %v2: vector<12xf64> + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 2, 6 ) + // CHECK-NEXT: lvl = ( 2, 6 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 1 + // CHECK-NEXT: pos[1] : ( 0, 3, 6 + // CHECK-NEXT: crd[1] : ( 0, 2, 4, 0, 2, 4 + // CHECK-NEXT: values : ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 12 ) + // CHECK-NEXT: lvl = ( 12 ) + // CHECK-NEXT: pos[0] : ( 0, 6 + // CHECK-NEXT: crd[0] : ( 0, 2, 4, 6, 8, 10 + // CHECK-NEXT: values : ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 2, 3, 2 ) + // CHECK-NEXT: lvl = ( 2, 3, 2 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 1 + // CHECK-NEXT: pos[1] : ( 0, 3, 6 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2 + // CHECK-NEXT: pos[2] : ( 0, 1, 2, 3, 4, 5, 6 + // CHECK-NEXT: crd[2] : ( 0, 0, 0, 0, 0, 0 + // CHECK-NEXT: values : ( 1.1, 1.3, 2.1, 2.3, 3.1, 3.3 + // CHECK-NEXT: ---- + // + sparse_tensor.print %reshaped0: tensor<2x6xf64, #SparseMatrix> + sparse_tensor.print %reshaped1: tensor<12xf64, #SparseVector> + sparse_tensor.print %reshaped2: tensor<2x3x2xf64, #Sparse3dTensor> bufferization.dealloc_tensor %sm : tensor<3x4xf64, #SparseMatrix> bufferization.dealloc_tensor %reshaped0 : tensor<2x6xf64, #SparseMatrix> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_push_back.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_push_back.mlir index c2e83fc61c4b53..1536249e60f286 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_push_back.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_push_back.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -28,7 +28,7 @@ // RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} module { - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c10 = arith.constant 10 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir index e7dd0ad32a2430..0682bc6f314fd1 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -64,7 +64,7 @@ module { } // The main driver. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : i32 %c1 = arith.constant 1 : i32 %c2 = arith.constant 2 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir index 3330d2249707fc..085b36a368704d 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -81,7 +81,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %d0 = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir index afaf36c4072c09..20a8c5f812de9b 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -169,7 +169,7 @@ module { // // Main driver. // - func.func @entry() { + func.func @main() { %d0 = arith.constant 0.0 : f64 %c0 = arith.constant 0 : index @@ -207,22 +207,36 @@ module { // CHECK-SAME: ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ), // CHECK-SAME: ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 192 ) ) // - // CHECK-NEXT: ( 96, 192, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 2 + // CHECK-NEXT: dim = ( 8, 8 ) + // CHECK-NEXT: lvl = ( 8, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 7 + // CHECK-NEXT: pos[1] : ( 0, 1, 2 + // CHECK-NEXT: crd[1] : ( 0, 7 + // CHECK-NEXT: values : ( 96, 192 + // CHECK-NEXT: ---- // - // CHECK-NEXT: ( 96, 192, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 2 + // CHECK-NEXT: dim = ( 8, 8 ) + // CHECK-NEXT: lvl = ( 8, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 7 + // CHECK-NEXT: pos[1] : ( 0, 1, 2 + // CHECK-NEXT: crd[1] : ( 0, 7 + // CHECK-NEXT: values : ( 96, 192 + // CHECK-NEXT: ---- // - %m2 = sparse_tensor.values %2 : tensor<8x8xf64, #SM> to memref - %m3 = sparse_tensor.values %3 : tensor<8x8xf64, #SM> to memref %v0 = vector.transfer_read %0[%c0, %c0], %d0 : tensor<8x8xf64>, vector<8x8xf64> %v1 = vector.transfer_read %1[%c0, %c0], %d0 : tensor<8x8xf64>, vector<8x8xf64> - %v2 = vector.transfer_read %m2[%c0], %d0 : memref, vector<4xf64> - %v3 = vector.transfer_read %m3[%c0], %d0 : memref, vector<4xf64> vector.print %v0 : vector<8x8xf64> vector.print %v1 : vector<8x8xf64> - vector.print %v2 : vector<4xf64> - vector.print %v3 : vector<4xf64> + sparse_tensor.print %2 : tensor<8x8xf64, #SM> + sparse_tensor.print %3 : tensor<8x8xf64, #SM> // Release the resources. bufferization.dealloc_tensor %s : tensor<8x8xf64, #SM> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir index 6ec13fd623b5cd..4e9090ae201d02 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -65,7 +65,7 @@ module { // and then calls the sparse scaling kernel with the sparse tensor // as input argument. // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f32 @@ -88,11 +88,16 @@ module { // Print the resulting compacted values for verification. // - // CHECK: ( 2, 2, 2, 4, 6, 8, 2, 10, 2, 2, 12, 2, 14, 2, 2, 16 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 16 + // CHECK-NEXT: dim = ( 8, 8 ) + // CHECK-NEXT: lvl = ( 8, 8 ) + // CHECK-NEXT: pos[1] : ( 0, 3, 4, 5, 6, 8, 11, 14, 16 + // CHECK-NEXT: crd[1] : ( 0, 2, 7, 1, 2, 3, 1, 4, 1, 2, 5, 2, 6, 7, 2, 7 + // CHECK-NEXT: values : ( 2, 2, 2, 4, 6, 8, 2, 10, 2, 2, 12, 2, 14, 2, 2, 16 + // CHECK-NEXT: ---- // - %m = sparse_tensor.values %2 : tensor<8x8xf32, #CSR> to memref - %v = vector.transfer_read %m[%c0], %f0: memref, vector<16xf32> - vector.print %v : vector<16xf32> + sparse_tensor.print %2 : tensor<8x8xf32, #CSR> // Release the resources. bufferization.dealloc_tensor %1 : tensor<8x8xf32, #CSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir index 439144fedeeb89..dd8396dc23b036 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -68,17 +68,7 @@ module @func_sparse.2 { return %1 : tensor<2x3x4xf64, #SparseMatrix> } - func.func @dump(%arg0: tensor<2x3x4xf64, #SparseMatrix>) { - %d0 = arith.constant 0.0 : f64 - %c0 = arith.constant 0 : index - %dm = sparse_tensor.convert %arg0 : tensor<2x3x4xf64, #SparseMatrix> to tensor<2x3x4xf64> - %0 = vector.transfer_read %dm[%c0, %c0, %c0], %d0: tensor<2x3x4xf64>, vector<2x3x4xf64> - vector.print %0 : vector<2x3x4xf64> - bufferization.dealloc_tensor %dm : tensor<2x3x4xf64> - return - } - - func.func public @entry() { + func.func public @main() { %src = arith.constant dense<[ [ [ 1.0, 2.0, 3.0, 4.0 ], [ 5.0, 6.0, 7.0, 8.0 ], @@ -96,10 +86,34 @@ module @func_sparse.2 { %sm_t = call @condition(%t, %sm) : (i1, tensor<2x3x4xf64, #SparseMatrix>) -> tensor<2x3x4xf64, #SparseMatrix> %sm_f = call @condition(%f, %sm) : (i1, tensor<2x3x4xf64, #SparseMatrix>) -> tensor<2x3x4xf64, #SparseMatrix> - // CHECK: ( ( ( 0, 1, 2, 3 ), ( 4, 5, 6, 7 ), ( 8, 9, 10, 11 ) ), ( ( 12, 13, 14, 15 ), ( 16, 17, 18, 19 ), ( 20, 21, 22, 23 ) ) ) - // CHECK-NEXT: ( ( ( 2, 3, 4, 5 ), ( 6, 7, 8, 9 ), ( 10, 11, 12, 13 ) ), ( ( 14, 15, 16, 17 ), ( 18, 19, 20, 21 ), ( 22, 23, 24, 25 ) ) ) - call @dump(%sm_t) : (tensor<2x3x4xf64, #SparseMatrix>) -> () - call @dump(%sm_f) : (tensor<2x3x4xf64, #SparseMatrix>) -> () + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 24 + // CHECK-NEXT: dim = ( 2, 3, 4 ) + // CHECK-NEXT: lvl = ( 2, 3, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 1 + // CHECK-NEXT: pos[1] : ( 0, 3, 6 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2 + // CHECK-NEXT: pos[2] : ( 0, 4, 8, 12, 16, 20, 24 + // CHECK-NEXT: crd[2] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 24 + // CHECK-NEXT: dim = ( 2, 3, 4 ) + // CHECK-NEXT: lvl = ( 2, 3, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 1 + // CHECK-NEXT: pos[1] : ( 0, 3, 6 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2 + // CHECK-NEXT: pos[2] : ( 0, 4, 8, 12, 16, 20, 24 + // CHECK-NEXT: crd[2] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + // CHECK-NEXT: ---- + // + sparse_tensor.print %sm_t : tensor<2x3x4xf64, #SparseMatrix> + sparse_tensor.print %sm_f : tensor<2x3x4xf64, #SparseMatrix> bufferization.dealloc_tensor %sm : tensor<2x3x4xf64, #SparseMatrix> bufferization.dealloc_tensor %sm_t : tensor<2x3x4xf64, #SparseMatrix> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir index 533afb6644aeda..68bc17175e3b4b 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -97,39 +97,8 @@ module { return %0 : tensor } - // Dumps a sparse vector of type f64. - func.func @dump_vec(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<8xf64> - vector.print %1 : vector<8xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dv[%c0], %d0: tensor, vector<16xf64> - vector.print %2 : vector<16xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - - // Dump a sparse matrix. - func.func @dump_mat(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<16xf64> - vector.print %1 : vector<16xf64> - %dm = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dm[%c0, %c0], %d0: tensor, vector<5x5xf64> - vector.print %2 : vector<5x5xf64> - bufferization.dealloc_tensor %dm : tensor - return - } - // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index // Setup sparse matrices. @@ -151,19 +120,43 @@ module { // // Verify the results. // - // CHECK: ( 1, 2, -4, 0, 5, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 0, 2, 0, -4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 0, 0, 0, 1, 0 ), ( 0, 0, 0, 0, 2 ), ( 0, 3, 0, 4, 0 ), ( 0, 0, 0, 5, 6 ), ( 0, 0, 7, 0, 0 ) ) - // CHECK-NEXT: ( 1, 2, 5, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 0, 2, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 1, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 0, 0, 0, 1, 0 ), ( 0, 0, 0, 0, 2 ), ( 0, 0, 0, 4, 0 ), ( 0, 0, 0, 0, 6 ), ( 0, 0, 0, 0, 0 ) ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: dim = ( 10 ) + // CHECK-NEXT: lvl = ( 10 ) + // CHECK-NEXT: pos[0] : ( 0, 5 + // CHECK-NEXT: crd[0] : ( 1, 3, 5, 7, 9 + // CHECK-NEXT: values : ( 1, 2, -4, 0, 5 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 7 + // CHECK-NEXT: dim = ( 5, 5 ) + // CHECK-NEXT: lvl = ( 5, 5 ) + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 4, 6, 7 + // CHECK-NEXT: crd[1] : ( 3, 4, 1, 3, 3, 4, 2 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 3 + // CHECK-NEXT: dim = ( 10 ) + // CHECK-NEXT: lvl = ( 10 ) + // CHECK-NEXT: pos[0] : ( 0, 3 + // CHECK-NEXT: crd[0] : ( 1, 3, 9 + // CHECK-NEXT: values : ( 1, 2, 5 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 4 + // CHECK-NEXT: dim = ( 5, 5 ) + // CHECK-NEXT: lvl = ( 5, 5 ) + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 3, 4, 4 + // CHECK-NEXT: crd[1] : ( 3, 4, 3, 4 + // CHECK-NEXT: values : ( 1, 2, 4, 6 + // CHECK-NEXT: ---- // - call @dump_vec(%sv1) : (tensor) -> () - call @dump_mat(%sm1) : (tensor) -> () - call @dump_vec(%1) : (tensor) -> () - call @dump_mat(%2) : (tensor) -> () + sparse_tensor.print %sv1 : tensor + sparse_tensor.print %sm1 : tensor + sparse_tensor.print %1 : tensor + sparse_tensor.print %2 : tensor // Release the resources. bufferization.dealloc_tensor %sv1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_semiring_select.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_semiring_select.mlir index 6244be0ba7ab64..f4435c81117b2d 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_semiring_select.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_semiring_select.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -60,7 +60,7 @@ module { } // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f64 @@ -86,20 +86,24 @@ module { tensor<5x5xf64, #DCSR>) -> tensor<5x5xf64, #DCSR> - // CHECK: ( ( 0.1, 1.1, 0, 0, 0 ), - // CHECK-SAME: ( 0, 1.1, 2.2, 0, 0 ), - // CHECK-SAME: ( 0, 0, 2.1, 3.3, 0 ), - // CHECK-SAME: ( 0, 0, 0, 3.1, 4.4 ), - // CHECK-SAME: ( 0, 0, 0, 0, 4.1 ) ) - %r = sparse_tensor.convert %1 : tensor<5x5xf64, #DCSR> to tensor<5x5xf64> - %v2 = vector.transfer_read %r[%c0, %c0], %f0 : tensor<5x5xf64>, vector<5x5xf64> - vector.print %v2 : vector<5x5xf64> + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 5, 5 ) + // CHECK-NEXT: lvl = ( 5, 5 ) + // CHECK-NEXT: pos[0] : ( 0, 5 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4 + // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6, 8, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 1, 2, 2, 3, 3, 4, 4 + // CHECK-NEXT: values : ( 0.1, 1.1, 1.1, 2.2, 2.1, 3.3, 3.1, 4.4, 4.1 + // CHECK-NEXT: ---- + // + sparse_tensor.print %1 : tensor<5x5xf64, #DCSR> // Release the resources. bufferization.dealloc_tensor %sl: tensor<5x5xf64, #DCSR> bufferization.dealloc_tensor %sr: tensor<5x5xf64, #DCSR> bufferization.dealloc_tensor %1: tensor<5x5xf64, #DCSR> - bufferization.dealloc_tensor %r : tensor<5x5xf64> return } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir index 08e75dfa2c02ca..c09374918b7d6a 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -79,7 +79,7 @@ module { } // Driver method to call and verify sign kernel. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %du = arith.constant 0.0 : f64 @@ -110,11 +110,16 @@ module { // // Verify the results. // - // CHECK: ( -1, 1, -1, 1, 1, -1, nan, -nan, 1, -1, -0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 12 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 12 + // CHECK-NEXT: crd[0] : ( 0, 3, 5, 11, 13, 17, 18, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( -1, 1, -1, 1, 1, -1, nan, -nan, 1, -1, -0, 0 + // CHECK-NEXT: ---- // - %1 = sparse_tensor.values %0 : tensor to memref - %2 = vector.transfer_read %1[%c0], %du: memref, vector<13xf64> - vector.print %2 : vector<13xf64> + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %sv1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir index e0111f692601f0..7b3f9a2ce0e012 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -35,19 +35,19 @@ !Filename = !llvm.ptr #SortedCOO = #sparse_tensor.encoding<{ - map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) + map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton(soa)) }> #SortedCOOPermuted = #sparse_tensor.encoding<{ - map = (d0, d1) -> (d1 : compressed(nonunique), d0 : singleton), + map = (d0, d1) -> (d1 : compressed(nonunique), d0 : singleton(soa)), }> #SortedCOO3D = #sparse_tensor.encoding<{ - map = (d0, d1, d2) -> (d0 : compressed(nonunique), d1 : singleton(nonunique), d2 : singleton) + map = (d0, d1, d2) -> (d0 : compressed(nonunique), d1 : singleton(nonunique, soa), d2 : singleton(soa)) }> #SortedCOO3DPermuted = #sparse_tensor.encoding<{ - map = (d0, d1, d2) -> (d2 : compressed(nonunique), d0 : singleton(nonunique), d1 : singleton) + map = (d0, d1, d2) -> (d2 : compressed(nonunique), d0 : singleton(nonunique, soa), d1 : singleton(soa)) }> @@ -82,29 +82,7 @@ module { return %0 : tensor } - func.func @dumpi(%arg0: memref) { - %c0 = arith.constant 0 : index - %v = vector.transfer_read %arg0[%c0], %c0: memref, vector<20xindex> - vector.print %v : vector<20xindex> - return - } - - func.func @dumpsi(%arg0: memref>) { - %c0 = arith.constant 0 : index - %v = vector.transfer_read %arg0[%c0], %c0: memref>, vector<20xindex> - vector.print %v : vector<20xindex> - return - } - - func.func @dumpf(%arg0: memref) { - %c0 = arith.constant 0 : index - %nan = arith.constant 0x0 : f64 - %v = vector.transfer_read %arg0[%c0], %nan: memref, vector<20xf64> - vector.print %v : vector<20xf64> - return - } - - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -125,130 +103,88 @@ module { %4 = sparse_tensor.convert %m : tensor<5x4xf64> to tensor // - // CHECK: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 126, 127, 254, 1, 253, 2, 0, 1, 3, 98, 126, 127, 128, 249, 253, 255, 0, 0, 0 ) - // CHECK-NEXT: ( -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 4, 256 ) + // CHECK-NEXT: lvl = ( 4, 256 ) + // CHECK-NEXT: pos[0] : ( 0, 17 + // CHECK-NEXT: crd[0] : ( 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 + // CHECK-NEXT: crd[1] : ( 0, 126, 127, 254, 1, 253, 2, 0, 1, 3, 98, 126, 127, 128, 249, 253, 255 + // CHECK-NEXT: values : ( -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17 + // CHECK-NEXT: ---- // - %p0 = sparse_tensor.positions %0 { level = 0 : index } - : tensor to memref - %i00 = sparse_tensor.coordinates %0 { level = 0 : index } - : tensor to memref> - %i01 = sparse_tensor.coordinates %0 { level = 1 : index } - : tensor to memref> - %v0 = sparse_tensor.values %0 - : tensor to memref - call @dumpi(%p0) : (memref) -> () - call @dumpsi(%i00) : (memref>) -> () - call @dumpsi(%i01) : (memref>) -> () - call @dumpf(%v0) : (memref) -> () + sparse_tensor.print %0 : tensor // - // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 1, 1, 2, 3, 98, 126, 126, 127, 127, 128, 249, 253, 253, 254, 255, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 3, 1, 3, 2, 3, 3, 0, 3, 0, 3, 3, 3, 1, 3, 0, 3, 0, 0, 0 ) - // CHECK-NEXT: ( -1, 8, -5, -9, -7, 10, -11, 2, 12, -3, -13, 14, -15, 6, 16, 4, -17, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 4, 256 ) + // CHECK-NEXT: lvl = ( 256, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 17 + // CHECK-NEXT: crd[0] : ( 0, 0, 1, 1, 2, 3, 98, 126, 126, 127, 127, 128, 249, 253, 253, 254, 255 + // CHECK-NEXT: crd[1] : ( 0, 3, 1, 3, 2, 3, 3, 0, 3, 0, 3, 3, 3, 1, 3, 0, 3 + // CHECK-NEXT: values : ( -1, 8, -5, -9, -7, 10, -11, 2, 12, -3, -13, 14, -15, 6, 16, 4, -17 + // CHECK-NEXT: ---- // - %p1 = sparse_tensor.positions %1 { level = 0 : index } - : tensor to memref - %i10 = sparse_tensor.coordinates %1 { level = 0 : index } - : tensor to memref> - %i11 = sparse_tensor.coordinates %1 { level = 1 : index } - : tensor to memref> - %v1 = sparse_tensor.values %1 - : tensor to memref - call @dumpi(%p1) : (memref) -> () - call @dumpsi(%i10) : (memref>) -> () - call @dumpsi(%i11) : (memref>) -> () - call @dumpf(%v1) : (memref) -> () + sparse_tensor.print %1 : tensor // - // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 1, 1, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 1, 1, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0 ) - // CHECK-NEXT: ( 3, 63, 11, 100, 66, 61, 13, 43, 77, 10, 46, 61, 53, 3, 75, 22, 18, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 2, 3, 4 ) + // CHECK-NEXT: lvl = ( 2, 3, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 17 + // CHECK-NEXT: crd[0] : ( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 + // CHECK-NEXT: crd[1] : ( 0, 0, 1, 1, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 2, 2 + // CHECK-NEXT: crd[2] : ( 2, 3, 1, 2, 0, 1, 2, 3, 0, 2, 3, 0, 1, 2, 3, 1, 2 + // CHECK-NEXT: values : ( 3, 63, 11, 100, 66, 61, 13, 43, 77, 10, 46, 61, 53, 3, 75, 22, 18 + // CHECK-NEXT: ---- // - %p2 = sparse_tensor.positions %2 { level = 0 : index } - : tensor to memref - %i20 = sparse_tensor.coordinates %2 { level = 0 : index } - : tensor to memref> - %i21 = sparse_tensor.coordinates %2 { level = 1 : index } - : tensor to memref> - %i22 = sparse_tensor.coordinates %2 { level = 2 : index } - : tensor to memref> - %v2 = sparse_tensor.values %2 - : tensor to memref - call @dumpi(%p2) : (memref) -> () - call @dumpsi(%i20) : (memref>) -> () - call @dumpsi(%i21) : (memref>) -> () - call @dumpsi(%i21) : (memref>) -> () - call @dumpf(%v2) : (memref) -> () + sparse_tensor.print %2 : tensor // - // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 ) - // CHECK-NEXT: ( 66, 77, 61, 11, 61, 53, 22, 3, 100, 13, 10, 3, 18, 63, 43, 46, 75, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 2, 3, 4 ) + // CHECK-NEXT: lvl = ( 4, 2, 3 ) + // CHECK-NEXT: pos[0] : ( 0, 17 + // CHECK-NEXT: crd[0] : ( 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 + // CHECK-NEXT: crd[1] : ( 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1 + // CHECK-NEXT: crd[2] : ( 2, 0, 1, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 0, 2, 0, 1 + // CHECK-NEXT: values : ( 66, 77, 61, 11, 61, 53, 22, 3, 100, 13, 10, 3, 18, 63, 43, 46, 75 + // CHECK-NEXT: ---- // - %p3 = sparse_tensor.positions %3 { level = 0 : index } - : tensor to memref - %i30 = sparse_tensor.coordinates %3 { level = 0 : index } - : tensor to memref> - %i31 = sparse_tensor.coordinates %3 { level = 1 : index } - : tensor to memref> - %i32 = sparse_tensor.coordinates %3 { level = 2 : index } - : tensor to memref> - %v3 = sparse_tensor.values %3 - : tensor to memref - call @dumpi(%p3) : (memref) -> () - call @dumpsi(%i30) : (memref>) -> () - call @dumpsi(%i31) : (memref>) -> () - call @dumpsi(%i31) : (memref>) -> () - call @dumpf(%v3) : (memref) -> () + sparse_tensor.print %3 : tensor // - // CHECK-NEXT: ( 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 3, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 6, 5, 4, 3, 2, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 5, 4 ) + // CHECK-NEXT: lvl = ( 5, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 6 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 2, 3, 4 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 3, 1, 1 + // CHECK-NEXT: values : ( 6, 5, 4, 3, 2, 11 + // CHECK-NEXT: ---- // - %p4 = sparse_tensor.positions %4 { level = 0 : index } - : tensor to memref - %i40 = sparse_tensor.coordinates %4 { level = 0 : index } - : tensor to memref> - %i41 = sparse_tensor.coordinates %4 { level = 1 : index } - : tensor to memref> - %v4 = sparse_tensor.values %4 - : tensor to memref - call @dumpi(%p4) : (memref) -> () - call @dumpsi(%i40) : (memref>) -> () - call @dumpsi(%i41) : (memref>) -> () - call @dumpf(%v4) : (memref) -> () + sparse_tensor.print %4 : tensor // And last but not least, an actual operation applied to COO. // Note that this performs the operation "in place". %5 = call @sparse_scale(%4) : (tensor) -> tensor // - // CHECK-NEXT: ( 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 1, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 3, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 12, 10, 8, 6, 4, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 5, 4 ) + // CHECK-NEXT: lvl = ( 5, 4 ) + // CHECK-NEXT: pos[0] : ( 0, 6 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 2, 3, 4 + // CHECK-NEXT: crd[1] : ( 0, 3, 0, 3, 1, 1 + // CHECK-NEXT: values : ( 12, 10, 8, 6, 4, 22 + // CHECK-NEXT: ---- // - %p5 = sparse_tensor.positions %5 { level = 0 : index } - : tensor to memref - %i50 = sparse_tensor.coordinates %5 { level = 0 : index } - : tensor to memref> - %i51 = sparse_tensor.coordinates %5 { level = 1 : index } - : tensor to memref> - %v5 = sparse_tensor.values %5 - : tensor to memref - call @dumpi(%p5) : (memref) -> () - call @dumpsi(%i50) : (memref>) -> () - call @dumpsi(%i51) : (memref>) -> () - call @dumpf(%v5) : (memref) -> () + sparse_tensor.print %5 : tensor // Release the resources. bufferization.dealloc_tensor %0 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir index 573b1a2aac2598..ca8bcd7744c8f4 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -76,7 +76,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %i0 = arith.constant 0.0 : f64 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_storage.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_storage.mlir index 8ca95f2139e49a..2ee189de7906ca 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_storage.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_storage.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -72,7 +72,7 @@ module { // are typically not concerned with such details, but the test ensures // everything is working "under the hood". // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %d0 = arith.constant 0.0 : f64 @@ -107,166 +107,103 @@ module { // // Inspect storage scheme of Dense. // - // CHECK: ( 1, 0, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, - // CHECK-SAME: 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, - // CHECK-SAME: 0, 0, 0, 0, 6, 0, 0, 0, 0, 7, 8, 0, 0, 0, 0, 9, - // CHECK-SAME: 0, 0, 10, 0, 0, 0, 11, 12, 0, 13, 14, 0, 0, 0, 15, 16, - // CHECK-SAME: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 80 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 10, 8 ) + // CHECK-NEXT: values : ( 1, 0, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 7, 8, 0, 0, 0, 0, 9, 0, 0, 10, 0, 0, 0, 11, 12, 0, 13, 14, 0, 0, 0, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0 + // CHECK-NEXT: ---- // - %5 = sparse_tensor.values %0 : tensor<10x8xf64, #Dense> to memref - %6 = vector.transfer_read %5[%c0], %d0: memref, vector<80xf64> - vector.print %6 : vector<80xf64> + sparse_tensor.print %0 : tensor<10x8xf64, #Dense> // // Inspect storage scheme of CSR. // - // positions(1) - // indices(1) - // values // - // CHECK: ( 0, 3, 3, 4, 5, 6, 9, 12, 16, 16, 17 ) - // CHECK: ( 0, 2, 7, 2, 3, 4, 1, 2, 7, 2, 6, 7, 1, 2, 6, 7, 6 ) - // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 10, 8 ) + // CHECK-NEXT: pos[1] : ( 0, 3, 3, 4, 5, 6, 9, 12, 16, 16, 17 + // CHECK-NEXT: crd[1] : ( 0, 2, 7, 2, 3, 4, 1, 2, 7, 2, 6, 7, 1, 2, 6, 7, 6 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 + // CHECK-NEXT: ---- // - %7 = sparse_tensor.positions %1 { level = 1 : index } : tensor<10x8xf64, #CSR> to memref - %8 = vector.transfer_read %7[%c0], %c0: memref, vector<11xindex> - vector.print %8 : vector<11xindex> - %9 = sparse_tensor.coordinates %1 { level = 1 : index } : tensor<10x8xf64, #CSR> to memref - %10 = vector.transfer_read %9[%c0], %c0: memref, vector<17xindex> - vector.print %10 : vector<17xindex> - %11 = sparse_tensor.values %1 : tensor<10x8xf64, #CSR> to memref - %12 = vector.transfer_read %11[%c0], %d0: memref, vector<17xf64> - vector.print %12 : vector<17xf64> + sparse_tensor.print %1 : tensor<10x8xf64, #CSR> // // Inspect storage scheme of DCSR. // - // positions(0) - // indices(0) - // positions(1) - // indices(1) - // values + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 10, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 8 + // CHECK-NEXT: crd[0] : ( 0, 2, 3, 4, 5, 6, 7, 9 + // CHECK-NEXT: pos[1] : ( 0, 3, 4, 5, 6, 9, 12, 16, 17 + // CHECK-NEXT: crd[1] : ( 0, 2, 7, 2, 3, 4, 1, 2, 7, 2, 6, 7, 1, 2, 6, 7, 6 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 + // CHECK-NEXT: ---- // - // CHECK: ( 0, 8 ) - // CHECK: ( 0, 2, 3, 4, 5, 6, 7, 9 ) - // CHECK: ( 0, 3, 4, 5, 6, 9, 12, 16, 17 ) - // CHECK: ( 0, 2, 7, 2, 3, 4, 1, 2, 7, 2, 6, 7, 1, 2, 6, 7, 6 ) - // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ) - // - %13 = sparse_tensor.positions %2 { level = 0 : index } : tensor<10x8xf64, #DCSR> to memref - %14 = vector.transfer_read %13[%c0], %c0: memref, vector<2xindex> - vector.print %14 : vector<2xindex> - %15 = sparse_tensor.coordinates %2 { level = 0 : index } : tensor<10x8xf64, #DCSR> to memref - %16 = vector.transfer_read %15[%c0], %c0: memref, vector<8xindex> - vector.print %16 : vector<8xindex> - %17 = sparse_tensor.positions %2 { level = 1 : index } : tensor<10x8xf64, #DCSR> to memref - %18 = vector.transfer_read %17[%c0], %c0: memref, vector<9xindex> - vector.print %18 : vector<9xindex> - %19 = sparse_tensor.coordinates %2 { level = 1 : index } : tensor<10x8xf64, #DCSR> to memref - %20 = vector.transfer_read %19[%c0], %c0: memref, vector<17xindex> - vector.print %20 : vector<17xindex> - %21 = sparse_tensor.values %2 : tensor<10x8xf64, #DCSR> to memref - %22 = vector.transfer_read %21[%c0], %d0: memref, vector<17xf64> - vector.print %22 : vector<17xf64> + sparse_tensor.print %2 : tensor<10x8xf64, #DCSR> // // Inspect storage scheme of CSC. // - // positions(1) - // indices(1) - // values - // - // CHECK: ( 0, 1, 3, 8, 9, 10, 10, 13, 17 ) - // CHECK: ( 0, 5, 7, 0, 2, 5, 6, 7, 3, 4, 6, 7, 9, 0, 5, 6, 7 ) - // CHECK: ( 1, 7, 13, 2, 4, 8, 10, 14, 5, 6, 11, 15, 17, 3, 9, 12, 16 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 8, 10 ) + // CHECK-NEXT: pos[1] : ( 0, 1, 3, 8, 9, 10, 10, 13, 17 + // CHECK-NEXT: crd[1] : ( 0, 5, 7, 0, 2, 5, 6, 7, 3, 4, 6, 7, 9, 0, 5, 6, 7 + // CHECK-NEXT: values : ( 1, 7, 13, 2, 4, 8, 10, 14, 5, 6, 11, 15, 17, 3, 9, 12, 16 + // CHECK-NEXT: ---- // - %23 = sparse_tensor.positions %3 { level = 1 : index } : tensor<10x8xf64, #CSC> to memref - %24 = vector.transfer_read %23[%c0], %c0: memref, vector<9xindex> - vector.print %24 : vector<9xindex> - %25 = sparse_tensor.coordinates %3 { level = 1 : index } : tensor<10x8xf64, #CSC> to memref - %26 = vector.transfer_read %25[%c0], %c0: memref, vector<17xindex> - vector.print %26 : vector<17xindex> - %27 = sparse_tensor.values %3 : tensor<10x8xf64, #CSC> to memref - %28 = vector.transfer_read %27[%c0], %d0: memref, vector<17xf64> - vector.print %28 : vector<17xf64> + sparse_tensor.print %3 : tensor<10x8xf64, #CSC> // // Inspect storage scheme of DCSC. // - // positions(0) - // indices(0) - // positions(1) - // indices(1) - // values - // - // CHECK: ( 0, 7 ) - // CHECK: ( 0, 1, 2, 3, 4, 6, 7 ) - // CHECK: ( 0, 1, 3, 8, 9, 10, 13, 17 ) - // CHECK: ( 0, 5, 7, 0, 2, 5, 6, 7, 3, 4, 6, 7, 9, 0, 5, 6, 7 ) - // CHECK: ( 1, 7, 13, 2, 4, 8, 10, 14, 5, 6, 11, 15, 17, 3, 9, 12, 16 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 8, 10 ) + // CHECK-NEXT: pos[0] : ( 0, 7 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 6, 7 + // CHECK-NEXT: pos[1] : ( 0, 1, 3, 8, 9, 10, 13, 17 + // CHECK-NEXT: crd[1] : ( 0, 5, 7, 0, 2, 5, 6, 7, 3, 4, 6, 7, 9, 0, 5, 6, 7 + // CHECK-NEXT: values : ( 1, 7, 13, 2, 4, 8, 10, 14, 5, 6, 11, 15, 17, 3, 9, 12, 16 + // CHECK-NEXT: ---- // - %29 = sparse_tensor.positions %4 { level = 0 : index } : tensor<10x8xf64, #DCSC> to memref - %30 = vector.transfer_read %29[%c0], %c0: memref, vector<2xindex> - vector.print %30 : vector<2xindex> - %31 = sparse_tensor.coordinates %4 { level = 0 : index } : tensor<10x8xf64, #DCSC> to memref - %32 = vector.transfer_read %31[%c0], %c0: memref, vector<7xindex> - vector.print %32 : vector<7xindex> - %33 = sparse_tensor.positions %4 { level = 1 : index } : tensor<10x8xf64, #DCSC> to memref - %34 = vector.transfer_read %33[%c0], %c0: memref, vector<8xindex> - vector.print %34 : vector<8xindex> - %35 = sparse_tensor.coordinates %4 { level = 1 : index } : tensor<10x8xf64, #DCSC> to memref - %36 = vector.transfer_read %35[%c0], %c0: memref, vector<17xindex> - vector.print %36 : vector<17xindex> - %37 = sparse_tensor.values %4 : tensor<10x8xf64, #DCSC> to memref - %38 = vector.transfer_read %37[%c0], %d0: memref, vector<17xf64> - vector.print %38 : vector<17xf64> + sparse_tensor.print %4 : tensor<10x8xf64, #DCSC> // // Inspect storage scheme of BlockRow. // - // positions(0) - // indices(0) - // values + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 64 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 10, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 8 + // CHECK-NEXT: crd[0] : ( 0, 2, 3, 4, 5, 6, 7, 9 + // CHECK-NEXT: values : ( 1, 0, 2, 0, 0, 0, 0, 3, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 7, 8, 0, 0, 0, 0, 9, 0, 0, 10, 0, 0, 0, 11, 12, 0, 13, 14, 0, 0, 0, 15, 16, 0, 0, 0, 0, 0, 0, 17, 0 + // CHECK-NEXT: ---- // - // CHECK: ( 0, 8 ) - // CHECK: ( 0, 2, 3, 4, 5, 6, 7, 9 ) - // CHECK: ( 1, 0, 2, 0, 0, 0, 0, 3, 0, 0, 4, 0, 0, 0, 0, 0, - // CHECK-SAME: 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, - // CHECK-SAME: 0, 7, 8, 0, 0, 0, 0, 9, 0, 0, 10, 0, 0, 0, 11, 12, - // CHECK-SAME: 0, 13, 14, 0, 0, 0, 15, 16, 0, 0, 0, 0, 0, 0, 17, 0 ) - // - %39 = sparse_tensor.positions %x { level = 0 : index } : tensor<10x8xf64, #BlockRow> to memref - %40 = vector.transfer_read %39[%c0], %c0: memref, vector<2xindex> - vector.print %40 : vector<2xindex> - %41 = sparse_tensor.coordinates %x { level = 0 : index } : tensor<10x8xf64, #BlockRow> to memref - %42 = vector.transfer_read %41[%c0], %c0: memref, vector<8xindex> - vector.print %42 : vector<8xindex> - %43 = sparse_tensor.values %x : tensor<10x8xf64, #BlockRow> to memref - %44 = vector.transfer_read %43[%c0], %d0: memref, vector<64xf64> - vector.print %44 : vector<64xf64> + sparse_tensor.print %x : tensor<10x8xf64, #BlockRow> // // Inspect storage scheme of BlockCol. // - // positions(0) - // indices(0) - // values - // - // CHECK: ( 0, 7 ) - // CHECK: ( 0, 1, 2, 3, 4, 6, 7 ) - // CHECK: ( 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 13, 0, 0, 2, 0, 4, 0, - // CHECK-SAME: 0, 8, 10, 14, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, - // CHECK-SAME: 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 15, 0, 17, 3, 0, 0, 0, 0, 9, 12, 16, 0, 0 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 70 + // CHECK-NEXT: dim = ( 10, 8 ) + // CHECK-NEXT: lvl = ( 8, 10 ) + // CHECK-NEXT: pos[0] : ( 0, 7 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 6, 7 + // CHECK-NEXT: values : ( 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 13, 0, 0, 2, 0, 4, 0, 0, 8, 10, 14, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 15, 0, 17, 3, 0, 0, 0, 0, 9, 12, 16, 0, 0 + // CHECK-NEXT: ---- // - %45 = sparse_tensor.positions %y { level = 0 : index } : tensor<10x8xf64, #BlockCol> to memref - %46 = vector.transfer_read %45[%c0], %c0: memref, vector<2xindex> - vector.print %46 : vector<2xindex> - %47 = sparse_tensor.coordinates %y { level = 0 : index } : tensor<10x8xf64, #BlockCol> to memref - %48 = vector.transfer_read %47[%c0], %c0: memref, vector<7xindex> - vector.print %48 : vector<7xindex> - %49 = sparse_tensor.values %y : tensor<10x8xf64, #BlockCol> to memref - %50 = vector.transfer_read %49[%c0], %d0: memref, vector<70xf64> - vector.print %50 : vector<70xf64> + sparse_tensor.print %y : tensor<10x8xf64, #BlockCol> // Release the resources. bufferization.dealloc_tensor %0 : tensor<10x8xf64, #Dense> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir index 5184083f665d56..2b2b8536fe39ed 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -78,7 +78,7 @@ func.func @conv_2d_nhwc_hwcf_dual_CDCC(%arg0: tensor, %arg1: } -func.func @entry() { +func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir index e6cbff231024ed..d1c58bfb6d59ef 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -75,7 +75,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { %d0 = arith.constant 0.0 : f64 %c0 = arith.constant 0 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir index ee00a19a412306..16a8b50ab08e5c 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -67,7 +67,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { // Setup input sparse matrix from compressed constant. %d = arith.constant dense <[ [ 1.1, 1.2, 0.0, 1.4 ], diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir index 5fdf636ef1230a..f95c163a57c164 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -75,7 +75,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { //%d0 = arith.constant 0.0 : complex %d0 = complex.constant [0.0 : f64, 0.0 : f64] : complex %c0 = arith.constant 0 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir index 6a34695229495d..30be587c8f6119 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -66,7 +66,7 @@ module { // // Main driver that reads matrix from file and calls the sparse kernel. // - func.func @entry() { + func.func @main() { // Setup input sparse matrix from compressed constant. %d = arith.constant dense <[ [ 1.1, 1.2, 0.0, 1.4 ], diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir index 336044d5660057..29bc744c992032 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -56,28 +56,8 @@ module { return %0 : tensor } - // Dumps a sparse vector of type f64. - func.func @dump_vec_f64(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant -1.0 : f64 - %n = sparse_tensor.number_of_entries %arg0: tensor - vector.print %n : index - %0 = sparse_tensor.values %arg0 - : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<9xf64> - vector.print %1 : vector<9xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 - : tensor to tensor - %3 = vector.transfer_read %dv[%c0], %d0: tensor, vector<32xf64> - vector.print %3 : vector<32xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { // Setup sparse vector. %v1 = arith.constant sparse< [ [0], [3], [11], [17], [20], [21], [28], [29], [31] ], @@ -93,11 +73,16 @@ module { // // Verify the results (within some precision). // - // CHECK: 9 - // CHECK-NEXT: {{( -0.761[0-9]*, 0.761[0-9]*, 0.96[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 1 )}} - // CHECK-NEXT: {{( -0.761[0-9]*, 0, 0, 0.761[0-9]*, 0, 0, 0, 0, 0, 0, 0, 0.96[0-9]*, 0, 0, 0, 0, 0, 0.99[0-9]*, 0, 0, 0.99[0-9]*, 0.99[0-9]*, 0, 0, 0, 0, 0, 0, 0.99[0-9]*, 0.99[0-9]*, 0, 1 )}} + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ({{ -0.761[0-9]*, 0.761[0-9]*, 0.96[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 0.99[0-9]*, 1}} + // CHECK-NEXT: ---- // - call @dump_vec_f64(%0) : (tensor) -> () + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %sv1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir index d53b03025f5588..67155201c58442 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -67,7 +67,7 @@ module { } // Driver method to call and verify tensor multiplication kernel. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %default_val = arith.constant -1.0 : f64 @@ -103,30 +103,28 @@ module { %0 = call @tensor_mul(%sta, %stb) : (tensor, tensor) -> tensor - // Verify results // - // CHECK: 4 - // CHECK-NEXT: ( 2.4, 3.5, 2, 8 ) - // CHECK-NEXT: ( ( ( 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0 ), ( 2.4, 0, 3.5, 0, 0 ) ), - // CHECK-SAME: ( ( 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0 ) ), - // CHECK-SAME: ( ( 2, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0 ), ( 0, 0, 8, 0, 0 ) ) ) + // Verify results. // - %n = sparse_tensor.number_of_entries %0 : tensor - vector.print %n : index - %m1 = sparse_tensor.values %0 : tensor to memref - %v1 = vector.transfer_read %m1[%c0], %default_val: memref, vector<4xf64> - vector.print %v1 : vector<4xf64> - - // Print %0 in dense form. - %dt = sparse_tensor.convert %0 : tensor to tensor - %v2 = vector.transfer_read %dt[%c0, %c0, %c0], %default_val: tensor, vector<3x3x5xf64> - vector.print %v2 : vector<3x3x5xf64> + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 4 + // CHECK-NEXT: dim = ( 3, 3, 5 ) + // CHECK-NEXT: lvl = ( 3, 3, 5 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 2 + // CHECK-NEXT: pos[1] : ( 0, 1, 3 + // CHECK-NEXT: crd[1] : ( 2, 0, 2 + // CHECK-NEXT: pos[2] : ( 0, 2, 3, 4 + // CHECK-NEXT: crd[2] : ( 0, 2, 0, 2 + // CHECK-NEXT: values : ( 2.4, 3.5, 2, 8 + // CHECK-NEXT: ---- + // + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %sta : tensor bufferization.dealloc_tensor %stb : tensor bufferization.dealloc_tensor %0 : tensor - bufferization.dealloc_tensor %dt : tensor return } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir index 6ef6b393019a8e..356808ebee3f7c 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -67,7 +67,7 @@ module { } // Driver method to call and verify tensor kernel. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %d1 = arith.constant -1.0 : f64 @@ -90,22 +90,34 @@ module { // Call sparse vector kernels. %0 = call @tensor_scale(%st) : (tensor) -> tensor + // // Sanity check on stored values. // - // CHECK: 5 - // CHECK-NEXT: ( 1, 2, 3, 4, 5 ) - // CHECK-NEXT: 24 - // CHECK-NEXT: ( 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 6, 8, 0, 0, 0, 0, 10 ) - %m1 = sparse_tensor.values %st : tensor to memref - %m2 = sparse_tensor.values %0 : tensor to memref - %n1 = sparse_tensor.number_of_entries %st : tensor - %n2 = sparse_tensor.number_of_entries %0 : tensor - %v1 = vector.transfer_read %m1[%c0], %d1: memref, vector<5xf64> - %v2 = vector.transfer_read %m2[%c0], %d1: memref, vector<24xf64> - vector.print %n1 : index - vector.print %v1 : vector<5xf64> - vector.print %n2 : index - vector.print %v2 : vector<24xf64> + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: dim = ( 3, 4, 8 ) + // CHECK-NEXT: lvl = ( 3, 4, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 2 + // CHECK-NEXT: pos[1] : ( 0, 2, 3 + // CHECK-NEXT: crd[1] : ( 0, 3, 2 + // CHECK-NEXT: pos[2] : ( 0, 1, 2, 5 + // CHECK-NEXT: crd[2] : ( 0, 7, 1, 2, 7 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 24 + // CHECK-NEXT: dim = ( 3, 4, 8 ) + // CHECK-NEXT: lvl = ( 3, 4, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 2 + // CHECK-NEXT: pos[1] : ( 0, 2, 3 + // CHECK-NEXT: crd[1] : ( 0, 3, 2 + // CHECK-NEXT: values : ( 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 6, 8, 0, 0, 0, 0, 10 + // CHECK-NEXT: ---- + // + sparse_tensor.print %st : tensor + sparse_tensor.print %0 : tensor // Release the resources. bufferization.dealloc_tensor %st : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir index 185f6161493e04..549c2082fcb3ac 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -92,7 +92,7 @@ module { // // Main driver. // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c4 = arith.constant 4 : index @@ -115,26 +115,29 @@ module { // // Verify result. // - // CHECK: ( 1.1, 0, 3.1 ) - // CHECK-NEXT: ( 1.2, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 3.3 ) - // CHECK-NEXT: ( 1.4, 0, 3.4 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 4, 3 ) + // CHECK-NEXT: lvl = ( 4, 3 ) + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 4, 6 + // CHECK-NEXT: crd[1] : ( 0, 2, 0, 2, 0, 2 + // CHECK-NEXT: values : ( 1.1, 3.1, 1.2, 3.3, 1.4, 3.4 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: dim = ( 4, 3 ) + // CHECK-NEXT: lvl = ( 4, 3 ) + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 4, 6 + // CHECK-NEXT: crd[1] : ( 0, 2, 0, 2, 0, 2 + // CHECK-NEXT: values : ( 1.1, 3.1, 1.2, 3.3, 1.4, 3.4 + // CHECK-NEXT: ---- // - // CHECK-NEXT: ( 1.1, 0, 3.1 ) - // CHECK-NEXT: ( 1.2, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 3.3 ) - // CHECK-NEXT: ( 1.4, 0, 3.4 ) - // - %x = sparse_tensor.convert %0 : tensor<4x3xf64, #DCSR> to tensor<4x3xf64> - scf.for %i = %c0 to %c4 step %c1 { - %v1 = vector.transfer_read %x[%i, %c0], %du: tensor<4x3xf64>, vector<3xf64> - vector.print %v1 : vector<3xf64> - } - %y = sparse_tensor.convert %1 : tensor<4x3xf64, #DCSR> to tensor<4x3xf64> - scf.for %i = %c0 to %c4 step %c1 { - %v2 = vector.transfer_read %y[%i, %c0], %du: tensor<4x3xf64>, vector<3xf64> - vector.print %v2 : vector<3xf64> - } + sparse_tensor.print %0 : tensor<4x3xf64, #DCSR> + sparse_tensor.print %1 : tensor<4x3xf64, #DCSR> // Release resources. bufferization.dealloc_tensor %a : tensor<3x4xf64, #DCSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir index dba897334830ad..cc6f6a068746d0 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -31,7 +31,7 @@ // RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} #SortedCOO = #sparse_tensor.encoding<{ - map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) + map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton(soa)) }> module { @@ -52,7 +52,7 @@ module { return %1 : tensor<5x10xf32, #SortedCOO> } - func.func @entry() { + func.func @main() { %f0 = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -79,17 +79,27 @@ module { // // Verify original and transposed sorted COO. // - // CHECK: ( 10, 20, 30, 40, 50, 11, 21, 31, 41, 51, 12, 22, 32, 42, 52, 13, 23, 33, 43, 53, 14, 24, 34, 44, 54, 15, 25, 35, 45, 55, 16, 26, 36, 46, 56, 17, 27, 37, 47, 57, 18, 28, 38, 48, 58, 19, 29, 39, 49, 59 ) - // CHECK-NEXT: ( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 50 + // CHECK-NEXT: dim = ( 10, 5 ) + // CHECK-NEXT: lvl = ( 10, 5 ) + // CHECK-NEXT: pos[0] : ( 0, 50 + // CHECK-NEXT: crd[0] : ( 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 + // CHECK-NEXT: values : ( 10, 20, 30, 40, 50, 11, 21, 31, 41, 51, 12, 22, 32, 42, 52, 13, 23, 33, 43, 53, 14, 24, 34, 44, 54, 15, 25, 35, 45, 55, 16, 26, 36, 46, 56, 17, 27, 37, 47, 57, 18, 28, 38, 48, 58, 19, 29, 39, 49, 59 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 50 + // CHECK-NEXT: dim = ( 5, 10 ) + // CHECK-NEXT: lvl = ( 5, 10 ) + // CHECK-NEXT: pos[0] : ( 0, 50 + // CHECK-NEXT: crd[0] : ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: values : ( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 + // CHECK-NEXT: ---- // - %va = sparse_tensor.values %SA - : tensor<10x5xf32, #SortedCOO> to memref - %vat = sparse_tensor.values %SAT - : tensor<5x10xf32, #SortedCOO> to memref - %v1 = vector.transfer_read %va[%c0], %f0 : memref, vector<50xf32> - %v2 = vector.transfer_read %vat[%c0], %f0 : memref, vector<50xf32> - vector.print %v1 : vector<50xf32> - vector.print %v2 : vector<50xf32> + sparse_tensor.print %SA : tensor<10x5xf32, #SortedCOO> + sparse_tensor.print %SAT : tensor<5x10xf32, #SortedCOO> // Release resources. bufferization.dealloc_tensor %SA : tensor<10x5xf32, #SortedCOO> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir index e03f99253b7845..3da1e35818cfa5 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -204,54 +204,8 @@ module { return %0 : tensor } - // Dumps a sparse vector of type f64. - func.func @dump_vec_f64(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<32xf64> - vector.print %1 : vector<32xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %3 = vector.transfer_read %dv[%c0], %d0: tensor, vector<32xf64> - vector.print %3 : vector<32xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - - // Dumps a sparse vector of type i32. - func.func @dump_vec_i32(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0 : i32 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<24xi32> - vector.print %1 : vector<24xi32> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %3 = vector.transfer_read %dv[%c0], %d0: tensor, vector<32xi32> - vector.print %3 : vector<32xi32> - bufferization.dealloc_tensor %dv : tensor - return - } - - // Dump a sparse matrix. - func.func @dump_mat(%arg0: tensor) { - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<16xf64> - vector.print %1 : vector<16xf64> - %dm = sparse_tensor.convert %arg0 : tensor to tensor - %3 = vector.transfer_read %dm[%c0, %c0], %d0: tensor, vector<4x8xf64> - vector.print %3 : vector<4x8xf64> - bufferization.dealloc_tensor %dm : tensor - return - } - // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %cmu = arith.constant -99 : i32 %c0 = arith.constant 0 : index @@ -289,26 +243,66 @@ module { // // Verify the results. // - // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 0, 5, 6, 0, 0, 0, 0, 0, 0, 7, 8, 0, 9 ) - // CHECK-NEXT: ( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 ) - // CHECK-NEXT: ( 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0 ) - // CHECK-NEXT: ( -1, 1, 1, -2, 1, 1, 1, 1, 1, 1, 1, -3, 1, 1, 1, 1, 1, -4, 1, 1, -5, -6, 1, 1, 1, 1, 1, 1, -7, -8, 1, -9 ) - // CHECK-NEXT: ( -1, 1, 1, -2, 1, 1, 1, 1, 1, 1, 1, -3, 1, 1, 1, 1, 1, -4, 1, 1, -5, -6, 1, 1, 1, 1, 1, 1, -7, -8, 1, -9 ) - // CHECK-NEXT: ( 0, 6, 33, 68, 100, 126, 196, 232, 279, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 68, 0, 0, 100, 126, 0, 0, 0, 0, 0, 0, 196, 232, 0, 279 ) - // CHECK-NEXT: ( 3, 3, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 3, 3, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 3 ), ( 0, 0, 4, 0, 5, 0, 0, 6 ), ( 7, 0, 7, 7, 0, 0, 0, 0 ) ) - // CHECK-NEXT: ( 99, 99, 99, 99, 5, 6, 99, 99, 99, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( ( 99, 99, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 99 ), ( 0, 0, 99, 0, 5, 0, 0, 6 ), ( 99, 0, 99, 99, 0, 0, 0, 0 ) ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 23 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 23 + // CHECK-NEXT: crd[0] : ( 1, 2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 18, 19, 22, 23, 24, 25, 26, 27, 30 + // CHECK-NEXT: values : ( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 32 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 32 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + // CHECK-NEXT: values : ( -1, 1, 1, -2, 1, 1, 1, 1, 1, 1, 1, -3, 1, 1, 1, 1, 1, -4, 1, 1, -5, -6, 1, 1, 1, 1, 1, 1, -7, -8, 1, -9 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 0, 6, 33, 68, 100, 126, 196, 232, 279 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 8 ) + // CHECK-NEXT: lvl = ( 4, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 7, 2, 4, 7, 0, 2, 3 + // CHECK-NEXT: values : ( 3, 3, 3, 4, 5, 6, 7, 7, 7 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 4, 8 ) + // CHECK-NEXT: lvl = ( 4, 8 ) + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 7, 2, 4, 7, 0, 2, 3 + // CHECK-NEXT: values : ( 99, 99, 99, 99, 5, 6, 99, 99, 99 + // CHECK-NEXT: ---- // CHECK-NEXT: ( 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0 ) // - call @dump_vec_f64(%sv1) : (tensor) -> () - call @dump_vec_i32(%0) : (tensor) -> () - call @dump_vec_f64(%1) : (tensor) -> () - call @dump_vec_f64(%2) : (tensor) -> () - call @dump_mat(%3) : (tensor) -> () - call @dump_mat(%4) : (tensor) -> () + sparse_tensor.print %sv1 : tensor + sparse_tensor.print %0 : tensor + sparse_tensor.print %1 : tensor + sparse_tensor.print %2 : tensor + sparse_tensor.print %3 : tensor + sparse_tensor.print %4 : tensor %v = vector.transfer_read %5[%c0], %cmu: tensor, vector<32xi32> vector.print %v : vector<32xi32> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir index d9ca2dca85342a..55332333164130 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -162,24 +162,8 @@ module { return %0 : tensor } - // Dumps a sparse vector. - func.func @dump(%arg0: tensor) { - // Dump the values array to verify only sparse contents are stored. - %c0 = arith.constant 0 : index - %d0 = arith.constant 0.0 : f64 - %0 = sparse_tensor.values %arg0 : tensor to memref - %1 = vector.transfer_read %0[%c0], %d0: memref, vector<16xf64> - vector.print %1 : vector<16xf64> - // Dump the dense vector to verify structure is correct. - %dv = sparse_tensor.convert %arg0 : tensor to tensor - %2 = vector.transfer_read %dv[%c0], %d0: tensor, vector<32xf64> - vector.print %2 : vector<32xf64> - bufferization.dealloc_tensor %dv : tensor - return - } - // Driver method to call and verify vector kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %d1 = arith.constant 1.1 : f64 @@ -221,31 +205,69 @@ module { // // Verify the results. // - // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 0, 5, 6, 0, 0, 0, 0, 0, 0, 7, 8, 0, 9 ) - // CHECK-NEXT: ( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 11, 0, 12, 13, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 16, 0, 0, 17, 0, 0, 0, 0, 0, 0, 18, 19, 0, 20 ) - // CHECK-NEXT: ( 2, 4, 6, 8, 10, 12, 14, 16, 18, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 2, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 8, 0, 0, 10, 12, 0, 0, 0, 0, 0, 0, 14, 16, 0, 18 ) - // CHECK-NEXT: ( 2, 4, 6, 8, 10, 12, 14, 16, 18, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 2, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 8, 0, 0, 10, 12, 0, 0, 0, 0, 0, 0, 14, 16, 0, 18 ) - // CHECK-NEXT: ( 2, 11, 16, 13, 14, 6, 15, 8, 16, 10, 29, 32, 35, 38, 0, 0 ) - // CHECK-NEXT: ( 2, 11, 0, 16, 13, 0, 0, 0, 0, 0, 14, 6, 0, 0, 0, 0, 15, 8, 16, 0, 10, 29, 0, 0, 0, 0, 0, 0, 32, 35, 0, 38 ) - // CHECK-NEXT: ( 48, 204, 252, 304, 360, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) - // CHECK-NEXT: ( 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204, 0, 0, 0, 0, 0, 0, 252, 304, 0, 360 ) - // CHECK-NEXT: ( 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204, 0, 0, 0, 0, 0, 0, 252, 304, 0, 360 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 10 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 10 + // CHECK-NEXT: crd[0] : ( 1, 3, 4, 10, 16, 18, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 2, 4, 6, 8, 10, 12, 14, 16, 18 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 9 + // CHECK-NEXT: crd[0] : ( 0, 3, 11, 17, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 2, 4, 6, 8, 10, 12, 14, 16, 18 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 14 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 14 + // CHECK-NEXT: crd[0] : ( 0, 1, 3, 4, 10, 11, 16, 17, 18, 20, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 2, 11, 16, 13, 14, 6, 15, 8, 16, 10, 29, 32, 35, 38 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: pos[0] : ( 0, 5 + // CHECK-NEXT: crd[0] : ( 3, 21, 28, 29, 31 + // CHECK-NEXT: values : ( 48, 204, 252, 304, 360 + // CHECK-NEXT: ---- + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 32 + // CHECK-NEXT: dim = ( 32 ) + // CHECK-NEXT: lvl = ( 32 ) + // CHECK-NEXT: values : ( 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204, 0, 0, 0, 0, 0, 0, 252, 304, 0, 360 + // CHECK-NEXT: ---- // CHECK-NEXT: 1169.1 // - - call @dump(%sv1) : (tensor) -> () - call @dump(%sv2) : (tensor) -> () - call @dump(%0) : (tensor) -> () - call @dump(%1) : (tensor) -> () - call @dump(%2) : (tensor) -> () - call @dump(%3) : (tensor) -> () - %m4 = sparse_tensor.values %4 : tensor to memref - %v4 = vector.load %m4[%c0]: memref, vector<32xf64> - vector.print %v4 : vector<32xf64> + sparse_tensor.print %sv1 : tensor + sparse_tensor.print %sv2 : tensor + sparse_tensor.print %0 : tensor + sparse_tensor.print %1 : tensor + sparse_tensor.print %2 : tensor + sparse_tensor.print %3 : tensor + sparse_tensor.print %4 : tensor %v5 = tensor.extract %5[] : tensor vector.print %v5 : f64 diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index ccdc9fe78ea0d3..d552f0346644b3 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -1,5 +1,10 @@ // RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns -verify-diagnostics %s | FileCheck %s +// CHECK: notifyOperationInserted: test.legal_op_a, was unlinked +// CHECK-NEXT: notifyOperationReplaced: test.illegal_op_a +// CHECK-NEXT: notifyOperationModified: func.return +// CHECK-NEXT: notifyOperationErased: test.illegal_op_a + // CHECK-LABEL: verifyDirectPattern func.func @verifyDirectPattern() -> i32 { // CHECK-NEXT: "test.legal_op_a"() <{status = "Success"} @@ -8,6 +13,16 @@ func.func @verifyDirectPattern() -> i32 { return %result : i32 } +// ----- + +// CHECK: notifyOperationInserted: test.illegal_op_e, was unlinked +// CHECK-NEXT: notifyOperationReplaced: test.illegal_op_c +// CHECK-NEXT: notifyOperationModified: func.return +// CHECK-NEXT: notifyOperationErased: test.illegal_op_c +// CHECK-NEXT: notifyOperationInserted: test.legal_op_a, was unlinked +// CHECK-NEXT: notifyOperationReplaced: test.illegal_op_e +// CHECK-NEXT: notifyOperationErased: test.illegal_op_e + // CHECK-LABEL: verifyLargerBenefit func.func @verifyLargerBenefit() -> i32 { // CHECK-NEXT: "test.legal_op_a"() <{status = "Success"} @@ -16,16 +31,24 @@ func.func @verifyLargerBenefit() -> i32 { return %result : i32 } +// ----- + +// CHECK: notifyOperationModified: func.func +// Note: No block insertion because this function is external and no block +// signature conversion is performed. + // CHECK-LABEL: func private @remap_input_1_to_0() func.func private @remap_input_1_to_0(i16) +// ----- + // CHECK-LABEL: func @remap_input_1_to_1(%arg0: f64) func.func @remap_input_1_to_1(%arg0: i64) { // CHECK-NEXT: "test.valid"{{.*}} : (f64) "test.invalid"(%arg0) : (i64) -> () } -// CHECK-LABEL: func @remap_call_1_to_1(%arg0: f64) +// CHECK: func @remap_call_1_to_1(%arg0: f64) func.func @remap_call_1_to_1(%arg0: i64) { // CHECK-NEXT: call @remap_input_1_to_1(%arg0) : (f64) -> () call @remap_input_1_to_1(%arg0) : (i64) -> () @@ -33,12 +56,36 @@ func.func @remap_call_1_to_1(%arg0: i64) { return } +// ----- + +// Block signature conversion: new block is inserted. +// CHECK: notifyBlockInserted into func.func: was unlinked + +// Contents of the old block are moved to the new block. +// CHECK-NEXT: notifyOperationInserted: test.return, was linked, exact position unknown + +// The new block arguments are used in "test.return". +// CHECK-NEXT: notifyOperationModified: test.return + +// The old block is erased. +// CHECK-NEXT: notifyBlockErased + +// The function op gets a new type attribute. +// CHECK-NEXT: notifyOperationModified: func.func + +// "test.return" is replaced. +// CHECK-NEXT: notifyOperationInserted: test.return, was unlinked +// CHECK-NEXT: notifyOperationReplaced: test.return +// CHECK-NEXT: notifyOperationErased: test.return + // CHECK-LABEL: func @remap_input_1_to_N({{.*}}f16, {{.*}}f16) func.func @remap_input_1_to_N(%arg0: f32) -> f32 { // CHECK-NEXT: "test.return"{{.*}} : (f16, f16) -> () "test.return"(%arg0) : (f32) -> () } +// ----- + // CHECK-LABEL: func @remap_input_1_to_N_remaining_use(%arg0: f16, %arg1: f16) func.func @remap_input_1_to_N_remaining_use(%arg0: f32) { // CHECK-NEXT: [[CAST:%.*]] = "test.cast"(%arg0, %arg1) : (f16, f16) -> f32 @@ -54,6 +101,8 @@ func.func @remap_materialize_1_to_1(%arg0: i42) { "test.return"(%arg0) : (i42) -> () } +// ----- + // CHECK-LABEL: func @remap_input_to_self func.func @remap_input_to_self(%arg0: index) { // CHECK-NOT: test.cast @@ -68,6 +117,8 @@ func.func @remap_multi(%arg0: i64, %unused: i16, %arg1: i64) -> (i64, i64) { "test.invalid"(%arg0, %arg1) : (i64, i64) -> () } +// ----- + // CHECK-LABEL: func @no_remap_nested func.func @no_remap_nested() { // CHECK-NEXT: "foo.region" @@ -82,6 +133,8 @@ func.func @no_remap_nested() { return } +// ----- + // CHECK-LABEL: func @remap_moved_region_args func.func @remap_moved_region_args() { // CHECK-NEXT: return @@ -96,6 +149,8 @@ func.func @remap_moved_region_args() { return } +// ----- + // CHECK-LABEL: func @remap_cloned_region_args func.func @remap_cloned_region_args() { // CHECK-NEXT: return @@ -122,6 +177,8 @@ func.func @remap_drop_region() { return } +// ----- + // CHECK-LABEL: func @dropped_input_in_use func.func @dropped_input_in_use(%arg: i16, %arg2: i64) { // CHECK-NEXT: "test.cast"{{.*}} : () -> i16 @@ -130,6 +187,8 @@ func.func @dropped_input_in_use(%arg: i16, %arg2: i64) { "work"(%arg) : (i16) -> () } +// ----- + // CHECK-LABEL: func @up_to_date_replacement func.func @up_to_date_replacement(%arg: i8) -> i8 { // CHECK-NEXT: return @@ -139,6 +198,8 @@ func.func @up_to_date_replacement(%arg: i8) -> i8 { return %repl_2 : i8 } +// ----- + // CHECK-LABEL: func @remove_foldable_op // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: i32) func.func @remove_foldable_op(%arg0 : i32) -> (i32) { @@ -150,6 +211,8 @@ func.func @remove_foldable_op(%arg0 : i32) -> (i32) { return %0 : i32 } +// ----- + // CHECK-LABEL: @create_block func.func @create_block() { // Check that we created a block with arguments. @@ -161,6 +224,12 @@ func.func @create_block() { return } +// ----- + +// CHECK: notifyOperationModified: test.recursive_rewrite +// CHECK-NEXT: notifyOperationModified: test.recursive_rewrite +// CHECK-NEXT: notifyOperationModified: test.recursive_rewrite + // CHECK-LABEL: @bounded_recursion func.func @bounded_recursion() { // CHECK: test.recursive_rewrite 0 diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 27eae2ffd694b5..2da184bc3d85ba 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -327,8 +327,12 @@ struct TestPatternDriver struct DumpNotifications : public RewriterBase::Listener { void notifyBlockInserted(Block *block, Region *previous, Region::iterator previousIt) override { - llvm::outs() << "notifyBlockInserted into " - << block->getParentOp()->getName() << ": "; + llvm::outs() << "notifyBlockInserted"; + if (block->getParentOp()) { + llvm::outs() << " into " << block->getParentOp()->getName() << ": "; + } else { + llvm::outs() << " into unknown op: "; + } if (previous == nullptr) { llvm::outs() << "was unlinked\n"; } else { @@ -341,7 +345,9 @@ struct DumpNotifications : public RewriterBase::Listener { if (!previous.isSet()) { llvm::outs() << ", was unlinked\n"; } else { - if (previous.getPoint() == previous.getBlock()->end()) { + if (!previous.getPoint().getNodePtr()) { + llvm::outs() << ", was linked, exact position unknown\n"; + } else if (previous.getPoint() == previous.getBlock()->end()) { llvm::outs() << ", was last in block\n"; } else { llvm::outs() << ", previous = " << previous.getPoint()->getName() @@ -349,9 +355,18 @@ struct DumpNotifications : public RewriterBase::Listener { } } } + void notifyBlockErased(Block *block) override { + llvm::outs() << "notifyBlockErased\n"; + } void notifyOperationErased(Operation *op) override { llvm::outs() << "notifyOperationErased: " << op->getName() << "\n"; } + void notifyOperationModified(Operation *op) override { + llvm::outs() << "notifyOperationModified: " << op->getName() << "\n"; + } + void notifyOperationReplaced(Operation *op, ValueRange values) override { + llvm::outs() << "notifyOperationReplaced: " << op->getName() << "\n"; + } }; struct TestStrictPatternDriver @@ -1153,6 +1168,8 @@ struct TestLegalizePatternDriver if (mode == ConversionMode::Partial) { DenseSet unlegalizedOps; ConversionConfig config; + DumpNotifications dumpNotifications; + config.listener = &dumpNotifications; config.unlegalizedOps = &unlegalizedOps; if (failed(applyPartialConversion(getOperation(), target, std::move(patterns), config))) { @@ -1171,8 +1188,11 @@ struct TestLegalizePatternDriver return (bool)op->getAttrOfType("test.dynamically_legal"); }); + ConversionConfig config; + DumpNotifications dumpNotifications; + config.listener = &dumpNotifications; if (failed(applyFullConversion(getOperation(), target, - std::move(patterns)))) { + std::move(patterns), config))) { getOperation()->emitRemark() << "applyFullConversion failed"; } return; diff --git a/mlir/test/python/dialects/arith_dialect.py b/mlir/test/python/dialects/arith_dialect.py index 8bb80eed2b8105..c9af5e7b46db84 100644 --- a/mlir/test/python/dialects/arith_dialect.py +++ b/mlir/test/python/dialects/arith_dialect.py @@ -4,6 +4,7 @@ from mlir.ir import * import mlir.dialects.arith as arith import mlir.dialects.func as func +from array import array def run(f): @@ -92,3 +93,42 @@ def __str__(self): b = a * a # CHECK: ArithValue(%2 = arith.mulf %cst_1, %cst_1 : f64) print(b) + + +# CHECK-LABEL: TEST: testArrayConstantConstruction +@run +def testArrayConstantConstruction(): + with Context(), Location.unknown(): + module = Module.create() + with InsertionPoint(module.body): + i32_array = array("i", [1, 2, 3, 4]) + i32 = IntegerType.get_signless(32) + vec_i32 = VectorType.get([2, 2], i32) + arith.constant(vec_i32, i32_array) + arith.ConstantOp(vec_i32, DenseIntElementsAttr.get(i32_array, type=vec_i32)) + + # "q" is the equivalent of `long long` in C and requires at least + # 64 bit width integers on both Linux and Windows. + i64_array = array("q", [5, 6, 7, 8]) + i64 = IntegerType.get_signless(64) + vec_i64 = VectorType.get([1, 4], i64) + arith.constant(vec_i64, i64_array) + arith.ConstantOp(vec_i64, DenseIntElementsAttr.get(i64_array, type=vec_i64)) + + f32_array = array("f", [1.0, 2.0, 3.0, 4.0]) + f32 = F32Type.get() + vec_f32 = VectorType.get([4, 1], f32) + arith.constant(vec_f32, f32_array) + arith.ConstantOp(vec_f32, DenseFPElementsAttr.get(f32_array, type=vec_f32)) + + f64_array = array("d", [1.0, 2.0, 3.0, 4.0]) + f64 = F64Type.get() + vec_f64 = VectorType.get([2, 1, 2], f64) + arith.constant(vec_f64, f64_array) + arith.ConstantOp(vec_f64, DenseFPElementsAttr.get(f64_array, type=vec_f64)) + + # CHECK-COUNT-2: arith.constant dense<[{{\[}}1, 2], [3, 4]]> : vector<2x2xi32> + # CHECK-COUNT-2: arith.constant dense<[{{\[}}5, 6, 7, 8]]> : vector<1x4xi64> + # CHECK-COUNT-2: arith.constant dense<[{{\[}}1.000000e+00], [2.000000e+00], [3.000000e+00], [4.000000e+00]]> : vector<4x1xf32> + # CHECK-COUNT-2: arith.constant dense<[{{\[}}[1.000000e+00, 2.000000e+00]], [{{\[}}3.000000e+00, 4.000000e+00]]]> : vector<2x1x2xf64> + print(module) diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index 81634ae1edc490..fce7454bf2800d 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -715,16 +715,12 @@ struct AMDGPUQueueTy { std::lock_guard Lock(Mutex); assert(Queue && "Interacted with a non-initialized queue!"); - // Avoid defining the input dependency if already satisfied. - if (InputSignal && !InputSignal->load()) - InputSignal = nullptr; - // Add a barrier packet before the kernel packet in case there is a pending // preceding operation. The barrier packet will delay the processing of // subsequent queue's packets until the barrier input signal are satisfied. // No need output signal needed because the dependency is already guaranteed // by the queue barrier itself. - if (InputSignal) + if (InputSignal && InputSignal->load()) if (auto Err = pushBarrierImpl(nullptr, InputSignal)) return Err; @@ -1254,12 +1250,8 @@ struct AMDGPUStreamTy { // Consume stream slot and compute dependencies. auto [Curr, InputSignal] = consume(OutputSignal); - // Avoid defining the input dependency if already satisfied. - if (InputSignal && !InputSignal->load()) - InputSignal = nullptr; - // Issue the async memory copy. - if (InputSignal) { + if (InputSignal && InputSignal->load()) { hsa_signal_t InputSignalRaw = InputSignal->get(); return utils::asyncMemCopy(UseMultipleSdmaEngines, Dst, Agent, Src, Agent, CopySize, 1, &InputSignalRaw, @@ -1293,17 +1285,13 @@ struct AMDGPUStreamTy { // Consume stream slot and compute dependencies. auto [Curr, InputSignal] = consume(OutputSignals[0]); - // Avoid defining the input dependency if already satisfied. - if (InputSignal && !InputSignal->load()) - InputSignal = nullptr; - // Setup the post action for releasing the intermediate buffer. if (auto Err = Slots[Curr].schedReleaseBuffer(Inter, MemoryManager)) return Err; // Issue the first step: device to host transfer. Avoid defining the input // dependency if already satisfied. - if (InputSignal) { + if (InputSignal && InputSignal->load()) { hsa_signal_t InputSignalRaw = InputSignal->get(); if (auto Err = utils::asyncMemCopy( UseMultipleSdmaEngines, Inter, Agent, Src, Agent, CopySize, 1, @@ -1361,12 +1349,8 @@ struct AMDGPUStreamTy { // Consume stream slot and compute dependencies. auto [Curr, InputSignal] = consume(OutputSignal); - // Avoid defining the input dependency if already satisfied. - if (InputSignal && !InputSignal->load()) - InputSignal = nullptr; - // Issue the first step: host to host transfer. - if (InputSignal) { + if (InputSignal && InputSignal->load()) { // The std::memcpy is done asynchronously using an async handler. We store // the function's information in the action but it is not actually a // post action. @@ -1429,10 +1413,6 @@ struct AMDGPUStreamTy { // Consume stream slot and compute dependencies. auto [Curr, InputSignal] = consume(OutputSignal); - // Avoid defining the input dependency if already satisfied. - if (InputSignal && !InputSignal->load()) - InputSignal = nullptr; - // The agents need to have access to the corresponding memory // This is presently only true if the pointers were originally // allocated by this runtime or the caller made the appropriate diff --git a/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 b/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 deleted file mode 100644 index c09146198768b0..00000000000000 --- a/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 +++ /dev/null @@ -1,33 +0,0 @@ -! Offloading test with a target region mapping a declare target -! Fortran array writing some values to it and checking the host -! correctly receives the updates made on the device. -! REQUIRES: flang -! UNSUPPORTED: nvptx64-nvidia-cuda-LTO -! UNSUPPORTED: aarch64-unknown-linux-gnu -! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO -! UNSUPPORTED: x86_64-pc-linux-gnu -! UNSUPPORTED: x86_64-pc-linux-gnu-LTO - -! RUN: %libomptarget-compile-fortran-run-and-check-generic -module test_0 - implicit none - INTEGER :: sp(10) = (/0,0,0,0,0,0,0,0,0,0/) - !$omp declare target link(sp) -end module test_0 - -program main - use test_0 - integer :: i = 1 - integer :: j = 11 -!$omp target map(tofrom:sp, i, j) - do while (i <= j) - sp(i) = i; - i = i + 1 - end do -!$omp end target - -PRINT *, sp(:) - -end program - -! CHECK: 1 2 3 4 5 6 7 8 9 10 diff --git a/openmp/libomptarget/test/offloading/fortran/declare-target-vars-in-target-region.f90 b/openmp/libomptarget/test/offloading/fortran/declare-target-vars-in-target-region.f90 new file mode 100644 index 00000000000000..f524deac3bcce9 --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/declare-target-vars-in-target-region.f90 @@ -0,0 +1,81 @@ +! Offloading test with a target region mapping a declare target +! Fortran array writing some values to it and checking the host +! correctly receives the updates made on the device. +! REQUIRES: flang +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +module test_0 + implicit none + INTEGER :: arr1(10) = (/0,0,0,0,0,0,0,0,0,0/) + INTEGER :: arr2(10) = (/0,0,0,0,0,0,0,0,0,0/) + !$omp declare target link(arr1) enter(arr2) + INTEGER :: scalar = 1 + !$omp declare target link(scalar) +end module test_0 + +subroutine test_with_array_link_and_tofrom() + use test_0 + integer :: i = 1 + integer :: j = 11 + !$omp target map(tofrom:arr1, i, j) + do while (i <= j) + arr1(i) = i; + i = i + 1 + end do + !$omp end target + + ! CHECK: 1 2 3 4 5 6 7 8 9 10 + PRINT *, arr1(:) +end subroutine test_with_array_link_and_tofrom + +subroutine test_with_array_link_only() + use test_0 + integer :: i = 1 + integer :: j = 11 + !$omp target map(i, j) + do while (i <= j) + arr1(i) = i + 1; + i = i + 1 + end do + !$omp end target + + ! CHECK: 2 3 4 5 6 7 8 9 10 11 + PRINT *, arr1(:) +end subroutine test_with_array_link_only + +subroutine test_with_array_enter_only() + use test_0 + integer :: i = 1 + integer :: j = 11 + !$omp target map(i, j) + do while (i <= j) + arr2(i) = i + 1; + i = i + 1 + end do + !$omp end target + + ! CHECK: 0 0 0 0 0 0 0 0 0 0 + PRINT *, arr2(:) +end subroutine test_with_array_enter_only + +subroutine test_with_scalar_link_only() + use test_0 + !$omp target + scalar = 10 + !$omp end target + + ! CHECK: 10 + PRINT *, scalar +end subroutine test_with_scalar_link_only + +program main + call test_with_array_link_and_tofrom() + call test_with_array_link_only() + call test_with_array_enter_only() + call test_with_scalar_link_only() +end program diff --git a/openmp/runtime/src/kmp_collapse.cpp b/openmp/runtime/src/kmp_collapse.cpp index 2c410ca9b6030e..569d2c1508319d 100644 --- a/openmp/runtime/src/kmp_collapse.cpp +++ b/openmp/runtime/src/kmp_collapse.cpp @@ -1272,6 +1272,304 @@ void kmp_calc_original_ivs_for_end( } } +/************************************************************************** + * Identify nested loop structure - loops come in the canonical form + * Lower triangle matrix: i = 0; i <= N; i++ {0,0}:{N,0} + * j = 0; j <= 0/-1+1*i; j++ {0,0}:{0/-1,1} + * Upper Triangle matrix + * i = 0; i <= N; i++ {0,0}:{N,0} + * j = 0+1*i; j <= N; j++ {0,1}:{N,0} + * ************************************************************************/ +nested_loop_type_t +kmp_identify_nested_loop_structure(/*in*/ bounds_info_t *original_bounds_nest, + /*in*/ kmp_index_t n) { + // only 2-level nested loops are supported + if (n != 2) { + return nested_loop_type_unkown; + } + // loops must be canonical + KMP_ASSERT( + (original_bounds_nest[0].comparison == comparison_t::comp_less_or_eq) && + (original_bounds_nest[1].comparison == comparison_t::comp_less_or_eq)); + // check outer loop bounds: for triangular need to be {0,0}:{N,0} + kmp_uint64 outer_lb0_u64 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].lb0_u64); + kmp_uint64 outer_ub0_u64 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].ub0_u64); + kmp_uint64 outer_lb1_u64 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].lb1_u64); + kmp_uint64 outer_ub1_u64 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].ub1_u64); + if (outer_lb0_u64 != 0 || outer_lb1_u64 != 0 || outer_ub1_u64 != 0) { + return nested_loop_type_unkown; + } + // check inner bounds to determine triangle type + kmp_uint64 inner_lb0_u64 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].lb0_u64); + kmp_uint64 inner_ub0_u64 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].ub0_u64); + kmp_uint64 inner_lb1_u64 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].lb1_u64); + kmp_uint64 inner_ub1_u64 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].ub1_u64); + // lower triangle loop inner bounds need to be {0,0}:{0/-1,1} + if (inner_lb0_u64 == 0 && inner_lb1_u64 == 0 && + (inner_ub0_u64 == 0 || inner_ub0_u64 == -1) && inner_ub1_u64 == 1) { + return nested_loop_type_lower_triangular_matrix; + } + // upper triangle loop inner bounds need to be {0,1}:{N,0} + if (inner_lb0_u64 == 0 && inner_lb1_u64 == 1 && + inner_ub0_u64 == outer_ub0_u64 && inner_ub1_u64 == 0) { + return nested_loop_type_upper_triangular_matrix; + } + return nested_loop_type_unkown; +} + +/************************************************************************** + * SQRT Approximation: https://math.mit.edu/~stevenj/18.335/newton-sqrt.pdf + * Start point is x so the result is always > sqrt(x) + * The method has uniform convergence, PRECISION is set to 0.1 + * ************************************************************************/ +#define level_of_precision 0.1 +double sqrt_newton_approx(/*in*/ kmp_uint64 x) { + double sqrt_old = 0.; + double sqrt_new = (double)x; + do { + sqrt_old = sqrt_new; + sqrt_new = (sqrt_old + x / sqrt_old) / 2; + } while ((sqrt_old - sqrt_new) > level_of_precision); + return sqrt_new; +} + +/************************************************************************** + * Handle lower triangle matrix in the canonical form + * i = 0; i <= N; i++ {0,0}:{N,0} + * j = 0; j <= 0/-1 + 1*i; j++ {0,0}:{0/-1,1} + * ************************************************************************/ +void kmp_handle_lower_triangle_matrix( + /*in*/ kmp_uint32 nth, + /*in*/ kmp_uint32 tid, + /*in */ kmp_index_t n, + /*in/out*/ bounds_info_t *original_bounds_nest, + /*out*/ bounds_info_t *chunk_bounds_nest) { + + // transfer loop types from the original loop to the chunks + for (kmp_index_t i = 0; i < n; ++i) { + chunk_bounds_nest[i] = original_bounds_nest[i]; + } + // cleanup iv variables + kmp_uint64 outer_ub0 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].ub0_u64); + kmp_uint64 outer_lb0 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].lb0_u64); + kmp_uint64 inner_ub0 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].ub0_u64); + // calculate the chunk's lower and upper bounds + // the total number of iterations in the loop is the sum of the arithmetic + // progression from the outer lower to outer upper bound (inclusive since the + // loop is canonical) note that less_than inner loops (inner_ub0 = -1) + // effectively make the progression 1-based making N = (outer_ub0 - inner_lb0 + // + 1) -> N - 1 + kmp_uint64 outer_iters = (outer_ub0 - outer_lb0 + 1) + inner_ub0; + kmp_uint64 iter_total = outer_iters * (outer_iters + 1) / 2; + // the current thread's number of iterations: + // each thread gets an equal number of iterations: total number of iterations + // divided by the number of threads plus, if there's a remainder, + // the first threads with the number up to the remainder get an additional + // iteration each to cover it + kmp_uint64 iter_current = + iter_total / nth + ((tid < (iter_total % nth)) ? 1 : 0); + // cumulative number of iterations executed by all the previous threads: + // threads with the tid below the remainder will have (iter_total/nth+1) + // elements, and so will all threads before them so the cumulative number of + // iterations executed by the all previous will be the current thread's number + // of iterations multiplied by the number of previous threads which is equal + // to the current thread's tid; threads with the number equal or above the + // remainder will have (iter_total/nth) elements so the cumulative number of + // iterations previously executed is its number of iterations multipled by the + // number of previous threads which is again equal to the current thread's tid + // PLUS all the remainder iterations that will have been executed by the + // previous threads + kmp_uint64 iter_before_current = + tid * iter_current + ((tid < iter_total % nth) ? 0 : (iter_total % nth)); + // cumulative number of iterations executed with the current thread is + // the cumulative number executed before it plus its own + kmp_uint64 iter_with_current = iter_before_current + iter_current; + // calculate the outer loop lower bound (lbo) which is the max outer iv value + // that gives the number of iterations that is equal or just below the total + // number of iterations executed by the previous threads, for less_than + // (1-based) inner loops (inner_ub0 == -1) it will be i.e. + // lbo*(lbo-1)/2<=iter_before_current => lbo^2-lbo-2*iter_before_current<=0 + // for less_than_equal (0-based) inner loops (inner_ub == 0) it will be: + // i.e. lbo*(lbo+1)/2<=iter_before_current => + // lbo^2+lbo-2*iter_before_current<=0 both cases can be handled similarily + // using a parameter to control the equation sign + kmp_int64 inner_adjustment = 1 + 2 * inner_ub0; + kmp_uint64 lower_bound_outer = + (kmp_uint64)(sqrt_newton_approx(inner_adjustment * inner_adjustment + + 8 * iter_before_current) + + inner_adjustment) / + 2 - + inner_adjustment; + // calculate the inner loop lower bound which is the remaining number of + // iterations required to hit the total number of iterations executed by the + // previous threads giving the starting point of this thread + kmp_uint64 lower_bound_inner = + iter_before_current - + ((lower_bound_outer + inner_adjustment) * lower_bound_outer) / 2; + // calculate the outer loop upper bound using the same approach as for the + // inner bound except using the total number of iterations executed with the + // current thread + kmp_uint64 upper_bound_outer = + (kmp_uint64)(sqrt_newton_approx(inner_adjustment * inner_adjustment + + 8 * iter_with_current) + + inner_adjustment) / + 2 - + inner_adjustment; + // calculate the inner loop upper bound which is the remaining number of + // iterations required to hit the total number of iterations executed after + // the current thread giving the starting point of the next thread + kmp_uint64 upper_bound_inner = + iter_with_current - + ((upper_bound_outer + inner_adjustment) * upper_bound_outer) / 2; + // adjust the upper bounds down by 1 element to point at the last iteration of + // the current thread the first iteration of the next thread + if (upper_bound_inner == 0) { + // {n,0} => {n-1,n-1} + upper_bound_outer -= 1; + upper_bound_inner = upper_bound_outer; + } else { + // {n,m} => {n,m-1} (m!=0) + upper_bound_inner -= 1; + } + + // assign the values, zeroing out lb1 and ub1 values since the iteration space + // is now one-dimensional + chunk_bounds_nest[0].lb0_u64 = lower_bound_outer; + chunk_bounds_nest[1].lb0_u64 = lower_bound_inner; + chunk_bounds_nest[0].ub0_u64 = upper_bound_outer; + chunk_bounds_nest[1].ub0_u64 = upper_bound_inner; + chunk_bounds_nest[0].lb1_u64 = 0; + chunk_bounds_nest[0].ub1_u64 = 0; + chunk_bounds_nest[1].lb1_u64 = 0; + chunk_bounds_nest[1].ub1_u64 = 0; + +#if 0 + printf("tid/nth = %d/%d : From [%llu, %llu] To [%llu, %llu] : Chunks %llu/%llu\n", + tid, nth, chunk_bounds_nest[0].lb0_u64, chunk_bounds_nest[1].lb0_u64, + chunk_bounds_nest[0].ub0_u64, chunk_bounds_nest[1].ub0_u64, iter_current, iter_total); +#endif +} + +/************************************************************************** + * Handle upper triangle matrix in the canonical form + * i = 0; i <= N; i++ {0,0}:{N,0} + * j = 0+1*i; j <= N; j++ {0,1}:{N,0} + * ************************************************************************/ +void kmp_handle_upper_triangle_matrix( + /*in*/ kmp_uint32 nth, + /*in*/ kmp_uint32 tid, + /*in */ kmp_index_t n, + /*in/out*/ bounds_info_t *original_bounds_nest, + /*out*/ bounds_info_t *chunk_bounds_nest) { + + // transfer loop types from the original loop to the chunks + for (kmp_index_t i = 0; i < n; ++i) { + chunk_bounds_nest[i] = original_bounds_nest[i]; + } + // cleanup iv variables + kmp_uint64 outer_ub0 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].ub0_u64); + kmp_uint64 outer_lb0 = kmp_fix_iv(original_bounds_nest[0].loop_iv_type, + original_bounds_nest[0].lb0_u64); + kmp_uint64 inner_ub0 = kmp_fix_iv(original_bounds_nest[1].loop_iv_type, + original_bounds_nest[1].ub0_u64); + // calculate the chunk's lower and upper bounds + // the total number of iterations in the loop is the sum of the arithmetic + // progression from the outer lower to outer upper bound (inclusive since the + // loop is canonical) note that less_than inner loops (inner_ub0 = -1) + // effectively make the progression 1-based making N = (outer_ub0 - inner_lb0 + // + 1) -> N - 1 + kmp_uint64 outer_iters = (outer_ub0 - outer_lb0 + 1); + kmp_uint64 iter_total = outer_iters * (outer_iters + 1) / 2; + // the current thread's number of iterations: + // each thread gets an equal number of iterations: total number of iterations + // divided by the number of threads plus, if there's a remainder, + // the first threads with the number up to the remainder get an additional + // iteration each to cover it + kmp_uint64 iter_current = + iter_total / nth + ((tid < (iter_total % nth)) ? 1 : 0); + // cumulative number of iterations executed by all the previous threads: + // threads with the tid below the remainder will have (iter_total/nth+1) + // elements, and so will all threads before them so the cumulative number of + // iterations executed by the all previous will be the current thread's number + // of iterations multiplied by the number of previous threads which is equal + // to the current thread's tid; threads with the number equal or above the + // remainder will have (iter_total/nth) elements so the cumulative number of + // iterations previously executed is its number of iterations multipled by the + // number of previous threads which is again equal to the current thread's tid + // PLUS all the remainder iterations that will have been executed by the + // previous threads + kmp_uint64 iter_before_current = + tid * iter_current + ((tid < iter_total % nth) ? 0 : (iter_total % nth)); + // cumulative number of iterations executed with the current thread is + // the cumulative number executed before it plus its own + kmp_uint64 iter_with_current = iter_before_current + iter_current; + // calculate the outer loop lower bound (lbo) which is the max outer iv value + // that gives the number of iterations that is equal or just below the total + // number of iterations executed by the previous threads, for less_than + // (1-based) inner loops (inner_ub0 == -1) it will be i.e. + // lbo*(lbo-1)/2<=iter_before_current => lbo^2-lbo-2*iter_before_current<=0 + // for less_than_equal (0-based) inner loops (inner_ub == 0) it will be: + // i.e. lbo*(lbo+1)/2<=iter_before_current => + // lbo^2+lbo-2*iter_before_current<=0 both cases can be handled similarily + // using a parameter to control the equatio sign + kmp_uint64 lower_bound_outer = + (kmp_uint64)(sqrt_newton_approx(1 + 8 * iter_before_current) + 1) / 2 - 1; + ; + // calculate the inner loop lower bound which is the remaining number of + // iterations required to hit the total number of iterations executed by the + // previous threads giving the starting point of this thread + kmp_uint64 lower_bound_inner = + iter_before_current - ((lower_bound_outer + 1) * lower_bound_outer) / 2; + // calculate the outer loop upper bound using the same approach as for the + // inner bound except using the total number of iterations executed with the + // current thread + kmp_uint64 upper_bound_outer = + (kmp_uint64)(sqrt_newton_approx(1 + 8 * iter_with_current) + 1) / 2 - 1; + // calculate the inner loop upper bound which is the remaining number of + // iterations required to hit the total number of iterations executed after + // the current thread giving the starting point of the next thread + kmp_uint64 upper_bound_inner = + iter_with_current - ((upper_bound_outer + 1) * upper_bound_outer) / 2; + // adjust the upper bounds down by 1 element to point at the last iteration of + // the current thread the first iteration of the next thread + if (upper_bound_inner == 0) { + // {n,0} => {n-1,n-1} + upper_bound_outer -= 1; + upper_bound_inner = upper_bound_outer; + } else { + // {n,m} => {n,m-1} (m!=0) + upper_bound_inner -= 1; + } + + // assign the values, zeroing out lb1 and ub1 values since the iteration space + // is now one-dimensional + chunk_bounds_nest[0].lb0_u64 = (outer_iters - 1) - upper_bound_outer; + chunk_bounds_nest[1].lb0_u64 = (outer_iters - 1) - upper_bound_inner; + chunk_bounds_nest[0].ub0_u64 = (outer_iters - 1) - lower_bound_outer; + chunk_bounds_nest[1].ub0_u64 = (outer_iters - 1) - lower_bound_inner; + chunk_bounds_nest[0].lb1_u64 = 0; + chunk_bounds_nest[0].ub1_u64 = 0; + chunk_bounds_nest[1].lb1_u64 = 0; + chunk_bounds_nest[1].ub1_u64 = 0; + +#if 0 + printf("tid/nth = %d/%d : From [%llu, %llu] To [%llu, %llu] : Chunks %llu/%llu\n", + tid, nth, chunk_bounds_nest[0].lb0_u64, chunk_bounds_nest[1].lb0_u64, + chunk_bounds_nest[0].ub0_u64, chunk_bounds_nest[1].ub0_u64, iter_current, iter_total); +#endif +} //----------Init API for non-rectangular loops-------------------------------- // Init API for collapsed loops (static, no chunks defined). @@ -1334,6 +1632,19 @@ __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, KMP_DEBUG_ASSERT(tid < nth); + // Handle special cases + nested_loop_type_t loop_type = + kmp_identify_nested_loop_structure(original_bounds_nest, n); + if (loop_type == nested_loop_type_lower_triangular_matrix) { + kmp_handle_lower_triangle_matrix(nth, tid, n, original_bounds_nest, + chunk_bounds_nest); + return TRUE; + } else if (loop_type == nested_loop_type_upper_triangular_matrix) { + kmp_handle_upper_triangle_matrix(nth, tid, n, original_bounds_nest, + chunk_bounds_nest); + return TRUE; + } + CollapseAllocator original_ivs_start(n); if (!kmp_calc_original_ivs_for_start(original_bounds_nest, n, diff --git a/openmp/runtime/src/kmp_collapse.h b/openmp/runtime/src/kmp_collapse.h index e4870185645de0..1044478554a022 100644 --- a/openmp/runtime/src/kmp_collapse.h +++ b/openmp/runtime/src/kmp_collapse.h @@ -45,6 +45,13 @@ enum loop_type_t : kmp_int32 { loop_type_int64 = 7 }; +// Defining loop types to handle special cases +enum nested_loop_type_t : kmp_int32 { + nested_loop_type_unkown = 0, + nested_loop_type_lower_triangular_matrix = 1, + nested_loop_type_upper_triangular_matrix = 2 +}; + /*! @ingroup WORK_SHARING * Describes the structure for rectangular nested loops. @@ -124,14 +131,14 @@ struct bounds_info_t { // It's represented in kmp_uint64, but each dimention is calculated in // that loop IV type. Also dimentions have to be converted to those types // when used in generated code. -typedef kmp_uint64* kmp_point_t; +typedef kmp_uint64 *kmp_point_t; // Array: Number of loop iterations on each nesting level to achieve some point, // in expanded space or in original space. // OMPTODO: move from using iterations to using offsets (iterations multiplied // by steps). For those we need to be careful with the types, as step can be // negative, but it'll remove multiplications and divisions in several places. -typedef kmp_loop_nest_iv_t* kmp_iterations_t; +typedef kmp_loop_nest_iv_t *kmp_iterations_t; // Internal struct with additional info: template struct bounds_info_internalXX_template { diff --git a/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLess.c b/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLess.c new file mode 100644 index 00000000000000..9d742066cf1fc2 --- /dev/null +++ b/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLess.c @@ -0,0 +1,124 @@ +// RUN: %libomp-compile-and-run +#include +#include +#include +#include "omp.h" + +#ifndef MAX_BOUND +#define MAX_BOUND 64 +#endif +#ifndef _MSC_VER +#define NO_EFFICIENCY_CHECK +#endif + +/* To ensure Correctness, only valid iterations are executed and are executed + only once. Stores the number of times an iteration is executed. */ +unsigned *execution_count = NULL; +/* Stores the number of iterations executed by each thread. */ +unsigned *iterations_per_thread = NULL; + +unsigned *Alloc(unsigned bound1, unsigned bound2) { + return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned))); +} + +void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) { + memset(p, 0, bound1 * bound2 * sizeof(unsigned)); +} + +void Free(unsigned *p) { free((void *)p); } + +unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) { + return &p[i * bound2 + j]; +} + +int test(unsigned upper_bound) { + + unsigned total_iterations = upper_bound * (upper_bound - 1) / 2; + unsigned num_threads = omp_get_max_threads(); + unsigned lower_per_chunk = total_iterations / num_threads; + unsigned upper_per_chunk = + lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0); + int i, j; + + omp_set_num_threads(num_threads); + + ZeroOut(execution_count, upper_bound, upper_bound); + ZeroOut(iterations_per_thread, num_threads, 1); + +#ifdef VERBOSE + fprintf(stderr, + "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] " + "chunks " + "loop type lower triangle <,< - ", + num_threads, upper_bound, total_iterations, lower_per_chunk, + upper_per_chunk); +#endif + +#pragma omp parallel shared(iterations_per_thread, execution_count) + { /* begin of parallel */ + /* Lower triangular execution_count matrix */ +#pragma omp for schedule(static) collapse(2) + for (i = 0; i < upper_bound; i++) { + for (j = 0; j < i; j++) { + (*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++; + (*Index(execution_count, i, j, upper_bound))++; + } + } /* end of for*/ + } /* end of parallel */ + + /* check the execution_count array */ + for (i = 0; i < upper_bound; i++) { + for (j = 0; j < i; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j<=i are valid, but should have been executed only once + */ + if (value != 1) { + fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + for (j = i; j < upper_bound; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j>=i are invalid and should not have been executed + */ + if (value > 0) { + fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + } + +#ifndef NO_EFFICIENCY_CHECK + /* Ensure the number of iterations executed by each thread is within bounds */ + for (i = 0; i < num_threads; i++) { + unsigned value = *Index(iterations_per_thread, i, 0, 1); + if (value < lower_per_chunk || value > upper_per_chunk) { + fprintf(stderr, + "ERROR: Inefficient Collapse thread %d of %d assigned %i " + "iterations; must be between %d and %d\n", + i, num_threads, value, lower_per_chunk, upper_per_chunk); + return 0; + } + } +#endif +#ifdef VERBOSE + fprintf(stderr, "PASSED\r\n"); +#endif + return 1; +} + +int main() { + + execution_count = Alloc(MAX_BOUND, MAX_BOUND); + iterations_per_thread = Alloc(omp_get_max_threads(), 1); + + for (unsigned j = 0; j < MAX_BOUND; j++) { + if (!test(j)) + return 1; + } + Free(execution_count); + Free(iterations_per_thread); + return 0; +} diff --git a/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLessEqual.c b/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLessEqual.c new file mode 100644 index 00000000000000..154ee0f69daa56 --- /dev/null +++ b/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLessEqual.c @@ -0,0 +1,124 @@ +// RUN: %libomp-compile-and-run +#include +#include +#include +#include "omp.h" + +#ifndef MAX_BOUND +#define MAX_BOUND 64 +#endif +#ifndef _MSC_VER +#define NO_EFFICIENCY_CHECK +#endif + +/* To ensure Correctness, only valid iterations are executed and are executed + only once. Stores the number of times an iteration is executed. */ +unsigned *execution_count = NULL; +/* Stores the number of iterations executed by each thread. */ +unsigned *iterations_per_thread = NULL; + +unsigned *Alloc(unsigned bound1, unsigned bound2) { + return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned))); +} + +void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) { + memset(p, 0, bound1 * bound2 * sizeof(unsigned)); +} + +void Free(unsigned *p) { free((void *)p); } + +unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) { + return &p[i * bound2 + j]; +} + +int test(int upper_bound) { + + unsigned total_iterations = upper_bound * (upper_bound + 1) / 2; + unsigned num_threads = omp_get_max_threads(); + unsigned lower_per_chunk = total_iterations / num_threads; + unsigned upper_per_chunk = + lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0); + int i, j; + + omp_set_num_threads(num_threads); + + ZeroOut(execution_count, upper_bound, upper_bound); + ZeroOut(iterations_per_thread, num_threads, 1); + +#ifdef VERBOSE + fprintf(stderr, + "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] " + "chunks " + "loop type lower triangle <,<= - ", + num_threads, upper_bound, total_iterations, lower_per_chunk, + upper_per_chunk); +#endif + +#pragma omp parallel shared(iterations_per_thread, execution_count) + { /* begin of parallel */ + /* Lower triangular execution_count matrix */ +#pragma omp for schedule(static) collapse(2) + for (i = 0; i < upper_bound; i++) { + for (j = 0; j <= i; j++) { + (*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++; + (*Index(execution_count, i, j, upper_bound))++; + } + } /* end of for*/ + } /* end of parallel */ + + /* check the execution_count array */ + for (i = 0; i < upper_bound; i++) { + for (j = 0; j <= i; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j<=i are valid, but should have been executed only once + */ + if (value != 1) { + fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + for (j = i + 1; j < upper_bound; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j>=i are invalid and should not have been executed + */ + if (value > 0) { + fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + } + +#ifndef NO_EFFICIENCY_CHECK + /* Ensure the number of iterations executed by each thread is within bounds */ + for (i = 0; i < num_threads; i++) { + unsigned value = *Index(iterations_per_thread, i, 0, 1); + if (value < lower_per_chunk || value > upper_per_chunk) { + fprintf(stderr, + "ERROR: Inefficient Collapse thread %d of %d assigned %i " + "iterations; must be between %d and %d\n", + i, num_threads, value, lower_per_chunk, upper_per_chunk); + return 0; + } + } +#endif +#ifdef VERBOSE + fprintf(stderr, "PASSED\r\n"); +#endif + return 1; +} + +int main() { + + execution_count = Alloc(MAX_BOUND, MAX_BOUND); + iterations_per_thread = Alloc(omp_get_max_threads(), 1); + + for (unsigned j = 0; j < MAX_BOUND; j++) { + if (!test(j)) + return 1; + } + Free(execution_count); + Free(iterations_per_thread); + return 0; +} diff --git a/openmp/runtime/test/worksharing/for/omp_for_collapse_UpperTriangular.c b/openmp/runtime/test/worksharing/for/omp_for_collapse_UpperTriangular.c new file mode 100644 index 00000000000000..452410025be0c9 --- /dev/null +++ b/openmp/runtime/test/worksharing/for/omp_for_collapse_UpperTriangular.c @@ -0,0 +1,124 @@ +// RUN: %libomp-compile-and-run +#include +#include +#include +#include "omp.h" + +#ifndef MAX_BOUND +#define MAX_BOUND 64 +#endif +#ifndef _MSC_VER +#define NO_EFFICIENCY_CHECK +#endif + +/* To ensure Correctness, only valid iterations are executed and are executed + only once. Stores the number of times an iteration is executed. */ +unsigned *execution_count = NULL; +/* Stores the number of iterations executed by each thread. */ +unsigned *iterations_per_thread = NULL; + +unsigned *Alloc(unsigned bound1, unsigned bound2) { + return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned))); +} + +void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) { + memset(p, 0, bound1 * bound2 * sizeof(unsigned)); +} + +void Free(unsigned *p) { free((void *)p); } + +unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) { + return &p[i * bound2 + j]; +} + +int test(unsigned upper_bound) { + + unsigned total_iterations = upper_bound * (upper_bound + 1) / 2; + unsigned num_threads = omp_get_max_threads(); + unsigned lower_per_chunk = total_iterations / num_threads; + unsigned upper_per_chunk = + lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0); + int i, j; + + omp_set_num_threads(num_threads); + + ZeroOut(execution_count, upper_bound, upper_bound); + ZeroOut(iterations_per_thread, num_threads, 1); + +#ifdef VERBOSE + fprintf(stderr, + "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] " + "chunks " + "loop type upper triangle <,< - ", + num_threads, upper_bound, total_iterations, lower_per_chunk, + upper_per_chunk); +#endif + +#pragma omp parallel shared(iterations_per_thread, execution_count) + { /* begin of parallel */ + /* Lower triangular execution_count matrix */ +#pragma omp for schedule(static) collapse(2) + for (i = 0; i < upper_bound; i++) { + for (j = i; j < upper_bound; j++) { + (*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++; + (*Index(execution_count, i, j, upper_bound))++; + } + } /* end of for*/ + } /* end of parallel */ + + /* check the execution_count array */ + for (i = 0; i < upper_bound; i++) { + for (j = i; j < upper_bound; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j<=i are valid, but should have been executed only once + */ + if (value != 1) { + fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + for (j = 0; j < i; j++) { + unsigned value = *Index(execution_count, i, j, upper_bound); + /* iteration with j>=i are invalid and should not have been executed + */ + if (value > 0) { + fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n", + i, j, value); + return 0; + } + } + } + +#ifndef NO_EFFICIENCY_CHECK + /* Ensure the number of iterations executed by each thread is within bounds */ + for (i = 0; i < num_threads; i++) { + unsigned value = *Index(iterations_per_thread, i, 0, 1); + if (value < lower_per_chunk || value > upper_per_chunk) { + fprintf(stderr, + "ERROR: Inefficient Collapse thread %d of %d assigned %i " + "iterations; must be between %d and %d\n", + i, num_threads, value, lower_per_chunk, upper_per_chunk); + return 0; + } + } +#endif +#ifdef VERBOSE + fprintf(stderr, "PASSED\r\n"); +#endif + return 1; +} + +int main() { + + execution_count = Alloc(MAX_BOUND, MAX_BOUND); + iterations_per_thread = Alloc(omp_get_max_threads(), 1); + + for (unsigned j = 0; j < MAX_BOUND; j++) { + if (!test(j)) + return 1; + } + Free(execution_count); + Free(iterations_per_thread); + return 0; +} diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 49a454379e1c7a..5c6cf761ebe7de 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -507,6 +507,7 @@ libc_support_library( ":__support_cpp_span", ":__support_cpp_string_view", ":__support_cpp_type_traits", + ":__support_uint", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel index a5c18fbb68b398..44692947af7c08 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel @@ -18,6 +18,7 @@ libc_support_library( "//libc:__support_cpp_string", "//libc:__support_cpp_string_view", "//libc:__support_osutil_io", + "//libc:__support_uint", "//libc:__support_uint128", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel index 8e94a84f586f4c..19d4c7869799a0 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel @@ -13,7 +13,11 @@ licenses(["notice"]) libc_test( name = "math_extras_test", srcs = ["math_extras_test.cpp"], - deps = ["//libc:__support_math_extras"], + deps = [ + "//libc:__support_integer_literals", + "//libc:__support_math_extras", + "//libc:__support_uint128", + ], ) # This test is currently disabled because of an issue in diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index e7d7c926b3fe2d..47bd665f31f41e 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -542,6 +542,7 @@ mlir_c_api_cc_library( includes = ["include"], deps = [ ":LLVMDialect", + "//llvm:Support", ], ) @@ -10864,6 +10865,7 @@ cc_library( ":MemRefDialect", ":Parser", ":SCFDialect", + ":MeshShardingInterface", ":SideEffectInterfaces", ":SparseTensorDialect", ":Support", @@ -11017,10 +11019,13 @@ cc_library( ":MathDialect", ":MemRefDialect", ":MemRefTransforms", + ":MeshDialect", + ":MeshTransforms", ":Pass", ":SCFDialect", ":SCFTransforms", ":SCFUtils", + ":MeshShardingInterface", ":SparseTensorDialect", ":SubsetOpInterface", ":Support",