diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index a5fb3901428d9d..765372aa9e402f 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1412,13 +1412,14 @@ class MCPlusBuilder { return false; } - /// Modify a direct call instruction \p Inst with an indirect call taking - /// a destination from a memory location pointed by \p TargetLocation symbol. - virtual bool convertCallToIndirectCall(MCInst &Inst, - const MCSymbol *TargetLocation, - MCContext *Ctx) { + /// Creates an indirect call to the function within the \p DirectCall PLT + /// stub. The function's memory location is pointed by the \p TargetLocation + /// symbol. + virtual InstructionListType + createIndirectPltCall(const MCInst &DirectCall, + const MCSymbol *TargetLocation, MCContext *Ctx) { llvm_unreachable("not implemented"); - return false; + return {}; } /// Morph an indirect call into a load where \p Reg holds the call target. diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h index 3cc9d823c815b2..4559ff5ff51592 100644 --- a/bolt/include/bolt/Rewrite/DWARFRewriter.h +++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h @@ -150,9 +150,6 @@ class DWARFRewriter { /// blocks) to be updated. void updateDebugAddressRanges(); - /// Rewrite .gdb_index section if present. - void updateGdbIndexSection(CUOffsetMap &CUMap, uint32_t NumCUs); - /// DWARFDie contains a pointer to a DIE and hence gets invalidated once the /// embedded DIE is destroyed. This wrapper class stores a DIE internally and /// could be cast to a DWARFDie that is valid even after the initial DIE is @@ -194,14 +191,6 @@ class DWARFRewriter { DwoRangesBase[DWOId] = RangesBase; } - /// Adds an GDBIndexTUEntry if .gdb_index seciton exists. - void addGDBTypeUnitEntry(const GDBIndexTUEntry &&Entry); - - /// Returns all entries needed for Types CU list - const GDBIndexTUEntryType &getGDBIndexTUEntryVector() const { - return GDBIndexTUEntryVector; - } - using OverriddenSectionsMap = std::unordered_map; /// Output .dwo files. void writeDWOFiles(DWARFUnit &, const OverriddenSectionsMap &, diff --git a/bolt/lib/Passes/PLTCall.cpp b/bolt/lib/Passes/PLTCall.cpp index d0276f22e14ef8..2ed996fadbb99e 100644 --- a/bolt/lib/Passes/PLTCall.cpp +++ b/bolt/lib/Passes/PLTCall.cpp @@ -48,8 +48,8 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) { return Error::success(); uint64_t NumCallsOptimized = 0; - for (auto &It : BC.getBinaryFunctions()) { - BinaryFunction &Function = It.second; + for (auto &BFI : BC.getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; if (!shouldOptimize(Function)) continue; @@ -61,18 +61,21 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) { if (opts::PLT == OT_HOT && !BB.getKnownExecutionCount()) continue; - for (MCInst &Instr : BB) { - if (!BC.MIB->isCall(Instr)) + for (auto II = BB.begin(); II != BB.end(); II++) { + if (!BC.MIB->isCall(*II)) continue; - const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr); + const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(*II); if (!CallSymbol) continue; const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CallSymbol); if (!CalleeBF || !CalleeBF->isPLTFunction()) continue; - BC.MIB->convertCallToIndirectCall(Instr, CalleeBF->getPLTSymbol(), - BC.Ctx.get()); - BC.MIB->addAnnotation(Instr, "PLTCall", true); + const InstructionListType NewCode = BC.MIB->createIndirectPltCall( + *II, CalleeBF->getPLTSymbol(), BC.Ctx.get()); + II = BB.replaceInstruction(II, NewCode); + assert(!NewCode.empty() && "PLT Call replacement must be non-empty"); + std::advance(II, NewCode.size() - 1); + BC.MIB->addAnnotation(*II, "PLTCall", true); ++NumCallsOptimized; } } diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index e1b3762a316606..0e475031eae4f3 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -2060,177 +2060,6 @@ void DWARFRewriter::writeDWOFiles( TempOut->keep(); } -void DWARFRewriter::addGDBTypeUnitEntry(const GDBIndexTUEntry &&Entry) { - std::lock_guard Lock(DWARFRewriterMutex); - if (!BC.getGdbIndexSection()) - return; - GDBIndexTUEntryVector.emplace_back(Entry); -} - -void DWARFRewriter::updateGdbIndexSection(CUOffsetMap &CUMap, uint32_t NumCUs) { - if (!BC.getGdbIndexSection()) - return; - - // See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html - // for .gdb_index section format. - - StringRef GdbIndexContents = BC.getGdbIndexSection()->getContents(); - - const char *Data = GdbIndexContents.data(); - - // Parse the header. - const uint32_t Version = read32le(Data); - if (Version != 7 && Version != 8) { - errs() << "BOLT-ERROR: can only process .gdb_index versions 7 and 8\n"; - exit(1); - } - - // Some .gdb_index generators use file offsets while others use section - // offsets. Hence we can only rely on offsets relative to each other, - // and ignore their absolute values. - const uint32_t CUListOffset = read32le(Data + 4); - const uint32_t CUTypesOffset = read32le(Data + 8); - const uint32_t AddressTableOffset = read32le(Data + 12); - const uint32_t SymbolTableOffset = read32le(Data + 16); - const uint32_t ConstantPoolOffset = read32le(Data + 20); - Data += 24; - - // Map CUs offsets to indices and verify existing index table. - std::map OffsetToIndexMap; - const uint32_t CUListSize = CUTypesOffset - CUListOffset; - const uint32_t TUListSize = AddressTableOffset - CUTypesOffset; - const unsigned NUmCUsEncoded = CUListSize / 16; - unsigned MaxDWARFVersion = BC.DwCtx->getMaxVersion(); - unsigned NumDWARF5TUs = - getGDBIndexTUEntryVector().size() - BC.DwCtx->getNumTypeUnits(); - bool SkipTypeUnits = false; - // For DWARF5 Types are in .debug_info. - // LLD doesn't generate Types CU List, and in CU list offset - // only includes CUs. - // GDB 11+ includes only CUs in CU list and generates Types - // list. - // GDB 9 includes CUs and TUs in CU list and generates TYpes - // list. The NumCUs is CUs + TUs, so need to modify the check. - // For split-dwarf - // GDB-11, DWARF5: TU units from dwo are not included. - // GDB-11, DWARF4: TU units from dwo are included. - if (MaxDWARFVersion >= 5) - SkipTypeUnits = !TUListSize ? true - : ((NUmCUsEncoded + NumDWARF5TUs) == - BC.DwCtx->getNumCompileUnits()); - - if (!((CUListSize == NumCUs * 16) || - (CUListSize == (NumCUs + NumDWARF5TUs) * 16))) { - errs() << "BOLT-ERROR: .gdb_index: CU count mismatch\n"; - exit(1); - } - DenseSet OriginalOffsets; - for (unsigned Index = 0, Units = BC.DwCtx->getNumCompileUnits(); - Index < Units; ++Index) { - const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index); - if (SkipTypeUnits && CU->isTypeUnit()) - continue; - const uint64_t Offset = read64le(Data); - Data += 16; - if (CU->getOffset() != Offset) { - errs() << "BOLT-ERROR: .gdb_index CU offset mismatch\n"; - exit(1); - } - - OriginalOffsets.insert(Offset); - OffsetToIndexMap[Offset] = Index; - } - - // Ignore old address table. - const uint32_t OldAddressTableSize = SymbolTableOffset - AddressTableOffset; - // Move Data to the beginning of symbol table. - Data += SymbolTableOffset - CUTypesOffset; - - // Calculate the size of the new address table. - uint32_t NewAddressTableSize = 0; - for (const auto &CURangesPair : ARangesSectionWriter->getCUAddressRanges()) { - const SmallVector &Ranges = CURangesPair.second; - NewAddressTableSize += Ranges.size() * 20; - } - - // Difference between old and new table (and section) sizes. - // Could be negative. - int32_t Delta = NewAddressTableSize - OldAddressTableSize; - - size_t NewGdbIndexSize = GdbIndexContents.size() + Delta; - - // Free'd by ExecutableFileMemoryManager. - auto *NewGdbIndexContents = new uint8_t[NewGdbIndexSize]; - uint8_t *Buffer = NewGdbIndexContents; - - write32le(Buffer, Version); - write32le(Buffer + 4, CUListOffset); - write32le(Buffer + 8, CUTypesOffset); - write32le(Buffer + 12, AddressTableOffset); - write32le(Buffer + 16, SymbolTableOffset + Delta); - write32le(Buffer + 20, ConstantPoolOffset + Delta); - Buffer += 24; - - using MapEntry = std::pair; - std::vector CUVector(CUMap.begin(), CUMap.end()); - // Need to sort since we write out all of TUs in .debug_info before CUs. - std::sort(CUVector.begin(), CUVector.end(), - [](const MapEntry &E1, const MapEntry &E2) -> bool { - return E1.second.Offset < E2.second.Offset; - }); - // Writing out CU List - for (auto &CUInfo : CUVector) { - // Skipping TU for DWARF5 when they are not included in CU list. - if (!OriginalOffsets.count(CUInfo.first)) - continue; - write64le(Buffer, CUInfo.second.Offset); - // Length encoded in CU doesn't contain first 4 bytes that encode length. - write64le(Buffer + 8, CUInfo.second.Length + 4); - Buffer += 16; - } - - // Rewrite TU CU List, since abbrevs can be different. - // Entry example: - // 0: offset = 0x00000000, type_offset = 0x0000001e, type_signature = - // 0x418503b8111e9a7b Spec says " triplet, the first value is the CU offset, - // the second value is the type offset in the CU, and the third value is the - // type signature" Looking at what is being generated by gdb-add-index. The - // first entry is TU offset, second entry is offset from it, and third entry - // is the type signature. - if (TUListSize) - for (const GDBIndexTUEntry &Entry : getGDBIndexTUEntryVector()) { - write64le(Buffer, Entry.UnitOffset); - write64le(Buffer + 8, Entry.TypeDIERelativeOffset); - write64le(Buffer + 16, Entry.TypeHash); - Buffer += sizeof(GDBIndexTUEntry); - } - - // Generate new address table. - for (const std::pair &CURangesPair : - ARangesSectionWriter->getCUAddressRanges()) { - const uint32_t CUIndex = OffsetToIndexMap[CURangesPair.first]; - const DebugAddressRangesVector &Ranges = CURangesPair.second; - for (const DebugAddressRange &Range : Ranges) { - write64le(Buffer, Range.LowPC); - write64le(Buffer + 8, Range.HighPC); - write32le(Buffer + 16, CUIndex); - Buffer += 20; - } - } - - const size_t TrailingSize = - GdbIndexContents.data() + GdbIndexContents.size() - Data; - assert(Buffer + TrailingSize == NewGdbIndexContents + NewGdbIndexSize && - "size calculation error"); - - // Copy over the rest of the original data. - memcpy(Buffer, Data, TrailingSize); - - // Register the new section. - BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents, - NewGdbIndexSize); -} - std::unique_ptr DWARFRewriter::makeFinalLocListsSection(DWARFVersion Version) { auto LocBuffer = std::make_unique(); diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index b2c8b2446f7e1e..6b3f5bce9f0f58 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -273,6 +273,8 @@ class LinuxKernelRewriter final : public MetadataRewriter { /// Handle alternative instruction info from .altinstructions. Error readAltInstructions(); + Error tryReadAltInstructions(uint32_t AltInstFeatureSize, + bool AltInstHasPadLen, bool ParseOnly); Error rewriteAltInstructions(); /// Read .pci_fixup @@ -1319,12 +1321,69 @@ Error LinuxKernelRewriter::rewriteBugTable() { /// u8 padlen; // present in older kernels /// } __packed; /// -/// Note the structures is packed. +/// Note that the structure is packed. +/// +/// Since the size of the "feature" field could be either u16 or u32, and +/// "padlen" presence is unknown, we attempt to parse .altinstructions section +/// using all possible combinations (four at this time). Since we validate the +/// contents of the section and its size, the detection works quite well. +/// Still, we leave the user the opportunity to specify these features on the +/// command line and skip the guesswork. Error LinuxKernelRewriter::readAltInstructions() { AltInstrSection = BC.getUniqueSectionByName(".altinstructions"); if (!AltInstrSection) return Error::success(); + // Presence of "padlen" field. + std::vector PadLenVariants; + if (opts::AltInstHasPadLen.getNumOccurrences()) + PadLenVariants.push_back(opts::AltInstHasPadLen); + else + PadLenVariants = {false, true}; + + // Size (in bytes) variants of "feature" field. + std::vector FeatureSizeVariants; + if (opts::AltInstFeatureSize.getNumOccurrences()) + FeatureSizeVariants.push_back(opts::AltInstFeatureSize); + else + FeatureSizeVariants = {2, 4}; + + for (bool AltInstHasPadLen : PadLenVariants) { + for (uint32_t AltInstFeatureSize : FeatureSizeVariants) { + LLVM_DEBUG({ + dbgs() << "BOLT-DEBUG: trying AltInstHasPadLen = " << AltInstHasPadLen + << "; AltInstFeatureSize = " << AltInstFeatureSize << ";\n"; + }); + if (Error E = tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen, + /*ParseOnly*/ true)) { + consumeError(std::move(E)); + continue; + } + + LLVM_DEBUG(dbgs() << "Matched .altinstructions format\n"); + + if (!opts::AltInstHasPadLen.getNumOccurrences()) + BC.outs() << "BOLT-INFO: setting --" << opts::AltInstHasPadLen.ArgStr + << '=' << AltInstHasPadLen << '\n'; + + if (!opts::AltInstFeatureSize.getNumOccurrences()) + BC.outs() << "BOLT-INFO: setting --" << opts::AltInstFeatureSize.ArgStr + << '=' << AltInstFeatureSize << '\n'; + + return tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen, + /*ParseOnly*/ false); + } + } + + // We couldn't match the format. Read again to properly propagate the error + // to the user. + return tryReadAltInstructions(opts::AltInstFeatureSize, + opts::AltInstHasPadLen, /*ParseOnly*/ false); +} + +Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize, + bool AltInstHasPadLen, + bool ParseOnly) { const uint64_t Address = AltInstrSection->getAddress(); DataExtractor DE = DataExtractor(AltInstrSection->getContents(), BC.AsmInfo->isLittleEndian(), @@ -1336,12 +1395,12 @@ Error LinuxKernelRewriter::readAltInstructions() { Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); const uint64_t AltInstAddress = Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize); + const uint64_t Feature = DE.getUnsigned(Cursor, AltInstFeatureSize); const uint8_t OrgSize = DE.getU8(Cursor); const uint8_t AltSize = DE.getU8(Cursor); // Older kernels may have the padlen field. - const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0; + const uint8_t PadLen = AltInstHasPadLen ? DE.getU8(Cursor) : 0; if (!Cursor) return createStringError( @@ -1358,7 +1417,7 @@ Error LinuxKernelRewriter::readAltInstructions() { << "\n\tFeature: 0x" << Twine::utohexstr(Feature) << "\n\tOrgSize: " << (int)OrgSize << "\n\tAltSize: " << (int)AltSize << '\n'; - if (opts::AltInstHasPadLen) + if (AltInstHasPadLen) BC.outs() << "\tPadLen: " << (int)PadLen << '\n'; } @@ -1375,7 +1434,7 @@ Error LinuxKernelRewriter::readAltInstructions() { BinaryFunction *AltBF = BC.getBinaryFunctionContainingAddress(AltInstAddress); - if (AltBF && BC.shouldEmit(*AltBF)) { + if (!ParseOnly && AltBF && BC.shouldEmit(*AltBF)) { BC.errs() << "BOLT-WARNING: alternative instruction sequence found in function " << *AltBF << '\n'; @@ -1397,6 +1456,9 @@ Error LinuxKernelRewriter::readAltInstructions() { " referenced by .altinstructions entry %d", OrgInstAddress, EntryID); + if (ParseOnly) + continue; + // There could be more than one alternative instruction sequences for the // same original instruction. Annotate each alternative separately. std::string AnnotationName = "AltInst"; diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index a74eda8e4a566e..5220d305b838d5 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -1054,6 +1054,47 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { return true; } + InstructionListType createIndirectPltCall(const MCInst &DirectCall, + const MCSymbol *TargetLocation, + MCContext *Ctx) override { + const bool IsTailCall = isTailCall(DirectCall); + assert((DirectCall.getOpcode() == AArch64::BL || + (DirectCall.getOpcode() == AArch64::B && IsTailCall)) && + "64-bit direct (tail) call instruction expected"); + + InstructionListType Code; + // Code sequence for indirect plt call: + // adrp x16 + // ldr x17, [x16, #] + // blr x17 ; or 'br' for tail calls + + MCInst InstAdrp; + InstAdrp.setOpcode(AArch64::ADRP); + InstAdrp.addOperand(MCOperand::createReg(AArch64::X16)); + InstAdrp.addOperand(MCOperand::createImm(0)); + setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, TargetLocation, + /* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE); + Code.emplace_back(InstAdrp); + + MCInst InstLoad; + InstLoad.setOpcode(AArch64::LDRXui); + InstLoad.addOperand(MCOperand::createReg(AArch64::X17)); + InstLoad.addOperand(MCOperand::createReg(AArch64::X16)); + InstLoad.addOperand(MCOperand::createImm(0)); + setOperandToSymbolRef(InstLoad, /* OpNum */ 2, TargetLocation, + /* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC); + Code.emplace_back(InstLoad); + + MCInst InstCall; + InstCall.setOpcode(IsTailCall ? AArch64::BR : AArch64::BLR); + InstCall.addOperand(MCOperand::createReg(AArch64::X17)); + if (IsTailCall) + setTailCall(InstCall); + Code.emplace_back(InstCall); + + return Code; + } + bool lowerTailCall(MCInst &Inst) override { removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); if (getConditionalTailCall(Inst)) diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index e350e701c7b7ba..515c9a94c58cd4 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -1639,11 +1639,16 @@ class X86MCPlusBuilder : public MCPlusBuilder { return true; } - bool convertCallToIndirectCall(MCInst &Inst, const MCSymbol *TargetLocation, - MCContext *Ctx) override { - assert((Inst.getOpcode() == X86::CALL64pcrel32 || - (Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst))) && + InstructionListType createIndirectPltCall(const MCInst &DirectCall, + const MCSymbol *TargetLocation, + MCContext *Ctx) override { + assert((DirectCall.getOpcode() == X86::CALL64pcrel32 || + (DirectCall.getOpcode() == X86::JMP_4 && isTailCall(DirectCall))) && "64-bit direct (tail) call instruction expected"); + + InstructionListType Code; + // Create a new indirect call by converting the previous direct call. + MCInst Inst = DirectCall; const auto NewOpcode = (Inst.getOpcode() == X86::CALL64pcrel32) ? X86::CALL64m : X86::JMP32m; Inst.setOpcode(NewOpcode); @@ -1664,7 +1669,8 @@ class X86MCPlusBuilder : public MCPlusBuilder { Inst.insert(Inst.begin(), MCOperand::createReg(X86::RIP)); // BaseReg - return true; + Code.emplace_back(Inst); + return Code; } void convertIndirectCallToLoad(MCInst &Inst, MCPhysReg Reg) override { diff --git a/bolt/test/AArch64/plt-call.test b/bolt/test/AArch64/plt-call.test new file mode 100644 index 00000000000000..da307d4a6c01e6 --- /dev/null +++ b/bolt/test/AArch64/plt-call.test @@ -0,0 +1,15 @@ +// Verify that PLTCall optimization works. + +RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \ +RUN: -o %t -Wl,-q +RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt --print-only=foo | FileCheck %s + +// Call to printf +CHECK: adrp x16, printf@GOT +CHECK: ldr x17, [x16, :lo12:printf@GOT] +CHECK: blr x17 # PLTCall: 1 + +// Call to puts, that was tail-call optimized +CHECK: adrp x16, puts@GOT +CHECK: ldr x17, [x16, :lo12:puts@GOT] +CHECK: br x17 # TAILCALL # PLTCall: 1 diff --git a/bolt/test/Inputs/plt-tailcall.c b/bolt/test/Inputs/plt-tailcall.c new file mode 100644 index 00000000000000..13f6e29c607747 --- /dev/null +++ b/bolt/test/Inputs/plt-tailcall.c @@ -0,0 +1,8 @@ +#include "stub.h" + +int foo(char *c) { + printf(""); + __attribute__((musttail)) return puts(c); +} + +int main() { return foo("a"); } diff --git a/bolt/test/X86/linux-alt-instruction.s b/bolt/test/X86/linux-alt-instruction.s index 2cdf31519682a8..66cd33a711b89b 100644 --- a/bolt/test/X86/linux-alt-instruction.s +++ b/bolt/test/X86/linux-alt-instruction.s @@ -12,24 +12,30 @@ ## Older kernels used to have padlen field in alt_instr. Check compatibility. # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --defsym PADLEN=1 \ -# RUN: %s -o %t.o -# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ +# RUN: %s -o %t.padlen.o +# RUN: %clang %cflags -nostdlib %t.padlen.o -o %t.padlen.exe \ # RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie -# RUN: llvm-bolt %t.exe --print-normalized --alt-inst-has-padlen -o %t.out \ +# RUN: llvm-bolt %t.padlen.exe --print-normalized --alt-inst-has-padlen -o %t.padlen.out \ # RUN: | FileCheck %s ## Check with a larger size of "feature" field in alt_instr. # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ -# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.o -# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ +# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.fs4.o +# RUN: %clang %cflags -nostdlib %t.fs4.o -o %t.fs4.exe \ # RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie -# RUN: llvm-bolt %t.exe --print-normalized --alt-inst-feature-size=4 -o %t.out \ +# RUN: llvm-bolt %t.fs4.exe --print-normalized --alt-inst-feature-size=4 -o %t.fs4.out \ # RUN: | FileCheck %s ## Check that out-of-bounds read is handled properly. -# RUN: not llvm-bolt %t.exe --print-normalized --alt-inst-feature-size=2 -o %t.out +# RUN: not llvm-bolt %t.fs4.exe --alt-inst-feature-size=2 -o %t.fs4.out + +## Check that BOLT automatically detects structure fields in .altinstructions. + +# RUN: llvm-bolt %t.exe --print-normalized -o %t.out | FileCheck %s +# RUN: llvm-bolt %t.exe --print-normalized -o %t.padlen.out | FileCheck %s +# RUN: llvm-bolt %t.exe --print-normalized -o %t.fs4.out | FileCheck %s # CHECK: BOLT-INFO: Linux kernel binary detected # CHECK: BOLT-INFO: parsed 2 alternative instruction entries diff --git a/bolt/test/X86/plt-call.test b/bolt/test/X86/plt-call.test new file mode 100644 index 00000000000000..e6ae86c179d279 --- /dev/null +++ b/bolt/test/X86/plt-call.test @@ -0,0 +1,11 @@ +// Verify that PLTCall optimization works. + +RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \ +RUN: -o %t -Wl,-q +RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt --print-only=foo | FileCheck %s + +// Call to printf +CHECK: callq *printf@GOT(%rip) # PLTCall: 1 + +// Call to puts, that was tail-call optimized +CHECK: jmpl *puts@GOT(%rip) # TAILCALL # PLTCall: 1 diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp index b658a80559937c..a4121df30d3dfa 100644 --- a/clang-tools-extra/clangd/TidyProvider.cpp +++ b/clang-tools-extra/clangd/TidyProvider.cpp @@ -221,13 +221,7 @@ TidyProvider disableUnusableChecks(llvm::ArrayRef ExtraBadChecks) { "-hicpp-invalid-access-moved", // Check uses dataflow analysis, which might hang/crash unexpectedly on // incomplete code. - "-bugprone-unchecked-optional-access", - - // ----- Performance problems ----- - - // This check runs expensive analysis for each variable. - // It has been observed to increase reparse time by 10x. - "-misc-const-correctness"); + "-bugprone-unchecked-optional-access"); size_t Size = BadChecks.size(); for (const std::string &Str : ExtraBadChecks) { diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 0385ca4b3a0639..820e4cc44a7bcd 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -1494,6 +1494,9 @@ bool ByteCodeExprGen::VisitMemberExpr(const MemberExpr *E) { return false; } + if (!isa(Member)) + return this->discard(Base) && this->visitDeclRef(Member, E); + if (Initializing) { if (!this->delegate(Base)) return false; @@ -1503,19 +1506,16 @@ bool ByteCodeExprGen::VisitMemberExpr(const MemberExpr *E) { } // Base above gives us a pointer on the stack. - if (const auto *FD = dyn_cast(Member)) { - const RecordDecl *RD = FD->getParent(); - const Record *R = getRecord(RD); - if (!R) - return false; - const Record::Field *F = R->getField(FD); - // Leave a pointer to the field on the stack. - if (F->Decl->getType()->isReferenceType()) - return this->emitGetFieldPop(PT_Ptr, F->Offset, E) && maybeLoadValue(); - return this->emitGetPtrFieldPop(F->Offset, E) && maybeLoadValue(); - } - - return false; + const auto *FD = cast(Member); + const RecordDecl *RD = FD->getParent(); + const Record *R = getRecord(RD); + if (!R) + return false; + const Record::Field *F = R->getField(FD); + // Leave a pointer to the field on the stack. + if (F->Decl->getType()->isReferenceType()) + return this->emitGetFieldPop(PT_Ptr, F->Offset, E) && maybeLoadValue(); + return this->emitGetPtrFieldPop(F->Offset, E) && maybeLoadValue(); } template diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 11e2d549d8a450..99e12da0081afc 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -5746,78 +5746,6 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, Var->addDebugInfo(GVE); } -void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder, - llvm::Instruction *Value, QualType Ty) { - // Only when -g2 or above is specified, debug info for variables will be - // generated. - if (CGM.getCodeGenOpts().getDebugInfo() <= - llvm::codegenoptions::DebugLineTablesOnly) - return; - - llvm::DebugLoc SaveDebugLoc = Builder.getCurrentDebugLocation(); - if (!SaveDebugLoc.get()) - return; - - llvm::DIFile *Unit = SaveDebugLoc->getFile(); - llvm::DIType *Type = getOrCreateType(Ty, Unit); - - // Check if Value is already a declared variable and has debug info, in this - // case we have nothing to do. Clang emits declared variable as alloca, and - // it is loaded upon use, so we identify such pattern here. - if (llvm::LoadInst *Load = dyn_cast(Value)) { - llvm::Value *Var = Load->getPointerOperand(); - // There can be implicit type cast applied on a variable if it is an opaque - // ptr, in this case its debug info may not match the actual type of object - // being used as in the next instruction, so we will need to emit a pseudo - // variable for type-casted value. - auto DeclareTypeMatches = [&](auto *DbgDeclare) { - return DbgDeclare->getVariable()->getType() == Type; - }; - if (any_of(llvm::findDbgDeclares(Var), DeclareTypeMatches) || - any_of(llvm::findDVRDeclares(Var), DeclareTypeMatches)) - return; - } - - // Find the correct location to insert a sequence of instructions to - // materialize Value on the stack. - auto SaveInsertionPoint = Builder.saveIP(); - if (llvm::InvokeInst *Invoke = dyn_cast(Value)) - Builder.SetInsertPoint(Invoke->getNormalDest()->begin()); - else if (llvm::Instruction *Next = Value->getIterator()->getNextNode()) - Builder.SetInsertPoint(Next); - else - Builder.SetInsertPoint(Value->getParent()); - llvm::DebugLoc DL = Value->getDebugLoc(); - if (DL.get()) - Builder.SetCurrentDebugLocation(DL); - else if (!Builder.getCurrentDebugLocation().get()) - Builder.SetCurrentDebugLocation(SaveDebugLoc); - - llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(Value->getType()); - Address PseudoVarAddr(PseudoVar, Value->getType(), - CharUnits::fromQuantity(PseudoVar->getAlign())); - llvm::LoadInst *Load = Builder.CreateLoad(PseudoVarAddr); - Value->replaceAllUsesWith(Load); - Builder.SetInsertPoint(Load); - Builder.CreateStore(Value, PseudoVarAddr); - - // Emit debug info for materialized Value. - unsigned Line = Builder.getCurrentDebugLocation().getLine(); - unsigned Column = Builder.getCurrentDebugLocation().getCol(); - llvm::DILocalVariable *D = DBuilder.createAutoVariable( - LexicalBlockStack.back(), "", nullptr, 0, Type, false, - llvm::DINode::FlagArtificial); - llvm::DILocation *DIL = - llvm::DILocation::get(CGM.getLLVMContext(), Line, Column, - LexicalBlockStack.back(), CurInlinedAt); - SmallVector Expr; - DBuilder.insertDeclare(PseudoVar, D, DBuilder.createExpression(Expr), DIL, - Load); - - Builder.restoreIP(SaveInsertionPoint); - Builder.SetCurrentDebugLocation(SaveDebugLoc); -} - void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl GD) { diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index da466837aa3c34..8fe738be215687 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -530,12 +530,6 @@ class CGDebugInfo { /// Emit information about an external variable. void EmitExternalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl); - /// Emit a pseudo variable and debug info for an intermediate value if it does - /// not correspond to a variable in the source code, so that a profiler can - /// track more accurate usage of certain instructions of interest. - void EmitPseudoVariable(CGBuilderTy &Builder, llvm::Instruction *Value, - QualType Ty); - /// Emit information about global variable alias. void EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl Decl); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 58f0a3113b4f81..1b144c178ce960 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -1937,26 +1937,7 @@ Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) { } } - llvm::Value *Result = EmitLoadOfLValue(E); - - // If -fdebug-info-for-profiling is specified, emit a pseudo variable and its - // debug info for the pointer, even if there is no variable associated with - // the pointer's expression. - if (CGF.CGM.getCodeGenOpts().DebugInfoForProfiling && CGF.getDebugInfo()) { - if (llvm::LoadInst *Load = dyn_cast(Result)) { - if (llvm::GetElementPtrInst *GEP = - dyn_cast(Load->getPointerOperand())) { - if (llvm::Instruction *Pointer = - dyn_cast(GEP->getPointerOperand())) { - QualType Ty = E->getBase()->getType(); - if (!E->isArrow()) - Ty = CGF.getContext().getPointerType(Ty); - CGF.getDebugInfo()->EmitPseudoVariable(Builder, Pointer, Ty); - } - } - } - } - return Result; + return EmitLoadOfLValue(E); } Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 4b9b735f1cfb43..95a6fe66babae9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1493,7 +1493,7 @@ void Sema::ActOnExitFunctionContext() { /// /// This routine determines whether overloading is possible, not /// whether a new declaration actually overloads a previous one. -/// It will return true in C++ (where overloads are alway permitted) +/// It will return true in C++ (where overloads are always permitted) /// or, as a C extension, when either the new declaration or a /// previous one is declared with the 'overloadable' attribute. static bool AllowOverloadingOfFunction(const LookupResult &Previous, @@ -4147,7 +4147,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, Scope *S, // If we are merging two functions where only one of them has a prototype, // we may have enough information to decide to issue a diagnostic that the - // function without a protoype will change behavior in C23. This handles + // function without a prototype will change behavior in C23. This handles // cases like: // void i(); void i(int j); // void i(int j); void i(); @@ -10553,7 +10553,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (getLangOpts().CUDA && !isFunctionTemplateSpecialization) CUDA().maybeAddHostDeviceAttrs(NewFD, Previous); - // Handle explict specializations of function templates + // Handle explicit specializations of function templates // and friend function declarations with an explicit // template argument list. if (isFunctionTemplateSpecialization) { @@ -12601,7 +12601,7 @@ void Sema::CheckMSVCRTEntryPoint(FunctionDecl *FD) { if (FD->getName() != "DllMain") FD->setHasImplicitReturnZero(true); - // Explicity specified calling conventions are applied to MSVC entry points + // Explicitly specified calling conventions are applied to MSVC entry points if (!hasExplicitCallingConv(T)) { if (isDefaultStdCall(FD, *this)) { if (FT->getCallConv() != CC_X86StdCall) { @@ -13674,12 +13674,12 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { CreateRecoveryExpr(Init->getBeginLoc(), Init->getEndLoc(), Args); if (RecoveryExpr.get()) VDecl->setInit(RecoveryExpr.get()); - // In general, for error recovery purposes, the initalizer doesn't play + // In general, for error recovery purposes, the initializer doesn't play // part in the valid bit of the declaration. There are a few exceptions: // 1) if the var decl has a deduced auto type, and the type cannot be // deduced by an invalid initializer; - // 2) if the var decl is decompsition decl with a non-deduced type, and - // the initialization fails (e.g. `int [a] = {1, 2};`); + // 2) if the var decl is a decomposition decl with a non-deduced type, + // and the initialization fails (e.g. `int [a] = {1, 2};`); // Case 1) was already handled elsewhere. if (isa(VDecl)) // Case 2) VDecl->setInvalidDecl(); @@ -13897,9 +13897,9 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { } } else if (VDecl->isFileVarDecl()) { // In C, extern is typically used to avoid tentative definitions when - // declaring variables in headers, but adding an intializer makes it a + // declaring variables in headers, but adding an initializer makes it a // definition. This is somewhat confusing, so GCC and Clang both warn on it. - // In C++, extern is often used to give implictly static const variables + // In C++, extern is often used to give implicitly static const variables // external linkage, so don't warn in that case. If selectany is present, // this might be header code intended for C and C++ inclusion, so apply the // C++ rules. @@ -14093,7 +14093,7 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { return; } } - // The declaration is unitialized, no need for further checks. + // The declaration is uninitialized, no need for further checks. return; } @@ -16324,7 +16324,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, FSI->ObjCWarnForNoDesignatedInitChain = false; } if (FSI->ObjCWarnForNoInitDelegation) { - // Don't issue this warning for unavaialable inits. + // Don't issue this warning for unavailable inits. if (!MD->isUnavailable()) Diag(MD->getLocation(), diag::warn_objc_secondary_init_missing_init_call); @@ -17876,7 +17876,7 @@ Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, SourceLocation KWLoc, SkipBody->Previous = Def; makeMergedDefinitionVisible(Hidden); // Carry on and handle it like a normal definition. We'll - // skip starting the definitiion later. + // skip starting the definition later. } } else if (!IsExplicitSpecializationAfterInstantiation) { // A redeclaration in function prototype scope in C isn't @@ -20475,7 +20475,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, } else if (LangOpts.OpenMP > 45) { // In OpenMP host compilation prior to 5.0 everything was an emitted host // function. In 5.0, no_host was introduced which might cause a function to - // be ommitted. + // be omitted. std::optional DevTy = OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); if (DevTy) diff --git a/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp b/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp deleted file mode 100644 index baf791487771c0..00000000000000 --- a/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp +++ /dev/null @@ -1,120 +0,0 @@ -// Test debug info for intermediate value of a chained pointer deferencing -// expression when the flag -fdebug-info-for-pointer-type is enabled. -// RUN: %clang_cc1 -emit-llvm -triple x86_64-linux-gnu %s -fdebug-info-for-profiling -debug-info-kind=constructor -o - | FileCheck %s - -class A { -public: - int i; - char c; - void *p; - int arr[3]; -}; - -class B { -public: - A* a; -}; - -class C { -public: - B* b; - A* a; - A arr[10]; -}; - -// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func1{{.*}}( -// CHECK: [[A_ADDR:%.*]] = getelementptr inbounds %class.B, ptr {{%.*}}, i32 0, i32 0, !dbg [[DBG1:![0-9]+]] -// CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[A_ADDR]], align {{.*}}, !dbg [[DBG1]] -// CHECK-NEXT: [[PSEUDO1:%.*]] = alloca ptr, align {{.*}}, !dbg [[DBG1]] -// CHECK-NEXT: store ptr [[A]], ptr [[PSEUDO1]], align {{.*}}, !dbg [[DBG1]] -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META1:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1]] -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], align {{.*}}, !dbg [[DBG1]] -// CHECK-NEXT: {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 0, -int func1(B *b) { - return b->a->i; -} - -// Should generate a pseudo variable when pointer is type-casted. -// CHECK-LABEL: define dso_local noundef ptr @{{.*}}func2{{.*}}( -// CHECK: call void @llvm.dbg.declare(metadata ptr [[B_ADDR:%.*]], metadata [[META2:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: [[B:%.*]] = load ptr, ptr [[B_ADDR]], -// CHECK-NEXT: [[PSEUDO1:%.*]] = alloca ptr, -// CHECK-NEXT: store ptr [[B]], ptr [[PSEUDO1]], -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META3:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], -// CHECK-NEXT: {{%.*}} = getelementptr inbounds %class.B, ptr [[TMP1]], i32 0, -A* func2(void *b) { - return ((B*)b)->a; -} - -// Should not generate pseudo variable in this case. -// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func3{{.*}}( -// CHECK: call void @llvm.dbg.declare(metadata ptr [[B_ADDR:%.*]], metadata [[META4:![0-9]+]], metadata !DIExpression()) -// CHECK: call void @llvm.dbg.declare(metadata ptr [[LOCAL1:%.*]], metadata [[META5:![0-9]+]], metadata !DIExpression()) -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr -int func3(B *b) { - A *local1 = b->a; - return local1->i; -} - -// CHECK-LABEL: define dso_local noundef signext i8 @{{.*}}func4{{.*}}( -// CHECK: [[A_ADDR:%.*]] = getelementptr inbounds %class.C, ptr {{%.*}}, i32 0, i32 1 -// CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[A_ADDR]], -// CHECK-NEXT: [[PSEUDO1:%.*]] = alloca ptr, -// CHECK-NEXT: store ptr [[A]], ptr [[PSEUDO1]], -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META6:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], -// CHECK-NEXT: {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 0, -// CHECK: [[CALL:%.*]] = call noundef ptr @{{.*}}foo{{.*}}( -// CHECK-NEXT: [[PSEUDO2:%.*]] = alloca ptr, -// CHECK-NEXT: store ptr [[CALL]], ptr [[PSEUDO2]] -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[PSEUDO2]], metadata [[META6]], metadata !DIExpression()) -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PSEUDO2]] -// CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds %class.A, ptr [[TMP2]], i32 0, i32 1 -char func4(C *c) { - extern A* foo(int x); - return foo(c->a->i)->c; -} - -// CHECK-LABEL: define dso_local noundef signext i8 @{{.*}}func5{{.*}}( -// CHECK: call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META7:![0-9]+]], metadata !DIExpression()) -// CHECK: call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META8:![0-9]+]], metadata !DIExpression()) -// CHECK: [[A_ADDR:%.*]] = getelementptr inbounds %class.A, ptr {{%.*}}, i64 {{%.*}}, -// CHECK-NEXT: [[PSEUDO1:%.*]] = alloca ptr, -// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[PSEUDO1]], -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META9:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], -// CHECK-NEXT: {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 1, -char func5(void *arr, int n) { - return ((A*)arr)[n].c; -} - -// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func6{{.*}}( -// CHECK: call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META10:![0-9]+]], metadata !DIExpression()) -// CHECK: call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META11:![0-9]+]], metadata !DIExpression()) -int func6(B &b) { - return reinterpret_cast(b).i; -} - -// CHECK-DAG: [[META_A:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "A", -// CHECK-DAG: [[META_AP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_A]], -// CHECK-DAG: [[META_B:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "B", -// CHECK-DAG: [[META_BP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_B]], -// CHECK-DAG: [[META_C:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "C", -// CHECK-DAG: [[META_CP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_C]], -// CHECK-DAG: [[META_VP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, -// CHECK-DAG: [[META_I32:![0-9]+]] = !DIBasicType(name: "int", size: 32, -// CHECK-DAG: [[META_BR:![0-9]+]] = !DIDerivedType(tag: DW_TAG_reference_type, baseType: [[META_B]], - -// CHECK-DAG: [[DBG1]] = !DILocation(line: 34, column: 13, -// CHECK-DAG: [[META1]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial) -// CHECK-DAG: [[META2]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 46, type: [[META_VP]]) -// CHECK-DAG: [[META3]] = !DILocalVariable(scope: {{.*}}, type: [[META_BP]], flags: DIFlagArtificial) -// CHECK-DAG: [[META4]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 55, type: [[META_BP]]) -// CHECK-DAG: [[META5]] = !DILocalVariable(name: "local1", scope: {{.*}}, file: {{.*}}, line: 56, type: [[META_AP]]) -// CHECK-DAG: [[META6]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial) -// CHECK-DAG: [[META7]] = !DILocalVariable(name: "arr", arg: 1, scope: {{.*}}, file: {{.*}}, line: 88, type: [[META_VP]]) -// CHECK-DAG: [[META8]] = !DILocalVariable(name: "n", arg: 2, scope: {{.*}}, file: {{.*}}, line: 88, type: [[META_I32]]) -// CHECK-DAG: [[META9]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial) -// CHECK-DAG: [[META10]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 95, type: [[META_BR]]) -// CHECK-DAG: [[META11]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial) diff --git a/clang/test/CodeGenHLSL/convergence/for.hlsl b/clang/test/CodeGenHLSL/convergence/for.hlsl index 180fae74ba7514..95f9a196bdb676 100644 --- a/clang/test/CodeGenHLSL/convergence/for.hlsl +++ b/clang/test/CodeGenHLSL/convergence/for.hlsl @@ -92,7 +92,7 @@ void test6() { // CHECK: [[C1:%[a-zA-Z0-9]+]] = call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T1]]) ] // CHECK: br i1 [[C1]], label %if.then, label %if.end // CHECK: if.then: -// CHECK call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ] +// CHECK: call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ] // CHECK: br label %for.end // CHECK: if.end: // CHECK: br label %for.inc diff --git a/clang/test/SemaCXX/ms-const-member-expr.cpp b/clang/test/SemaCXX/ms-const-member-expr.cpp index 72cfe76fbe43a2..8312f84b550f00 100644 --- a/clang/test/SemaCXX/ms-const-member-expr.cpp +++ b/clang/test/SemaCXX/ms-const-member-expr.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 %s -std=c++11 -fms-compatibility -fsyntax-only -verify +// RUN: %clang_cc1 %s -std=c++11 -fms-compatibility -fsyntax-only -verify -fexperimental-new-constant-interpreter struct S { enum { E = 1 }; diff --git a/clang/test/SemaHLSL/standard_conversion_sequences.hlsl b/clang/test/SemaHLSL/standard_conversion_sequences.hlsl index a0d398105f15d6..256981d2c1e2e0 100644 --- a/clang/test/SemaHLSL/standard_conversion_sequences.hlsl +++ b/clang/test/SemaHLSL/standard_conversion_sequences.hlsl @@ -4,9 +4,8 @@ void test() { // CHECK: VarDecl {{.*}} used f3 'vector':'float __attribute__((ext_vector_type(3)))' cinit - // CHECK-NEXt: ImplicitCastExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(3)))' - // CHECK-NEXt: ImplicitCastExpr {{.*}} 'float' - // CHECK-NEXt: FloatingLiteral {{.*}} 'double' 1.000000e+00 + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(3)))' + // CHECK-NEXT: FloatingLiteral {{.*}} 'float' 1.000000e+00 vector f3 = 1.0; // No warning for splatting to a vector from a literal. diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index bed2ccb8b992a2..8a583bacb4a934 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -1392,7 +1392,7 @@ template class SizeClassAllocator64 { continue; } - const uptr PushedBytesDelta = BG->BytesInBGAtLastCheckpoint - BytesInBG; + const uptr PushedBytesDelta = BytesInBG - BG->BytesInBGAtLastCheckpoint; // Given the randomness property, we try to release the pages only if the // bytes used by free blocks exceed certain proportion of group size. Note diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 52f2034b8707a3..ec1fb411ff0e25 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -204,6 +204,8 @@ struct IntrinsicLibrary { llvm::ArrayRef); fir::ExtendedValue genCAssociatedCPtr(mlir::Type, llvm::ArrayRef); + mlir::Value genErfcScaled(mlir::Type resultType, + llvm::ArrayRef args); void genCFPointer(llvm::ArrayRef); void genCFProcPointer(llvm::ArrayRef); fir::ExtendedValue genCFunLoc(mlir::Type, llvm::ArrayRef); diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h b/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h index 558358257b5134..6857650ce52b74 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h @@ -18,6 +18,10 @@ class FirOpBuilder; namespace fir::runtime { +/// Generate call to ErfcScaled intrinsic runtime routine. +mlir::Value genErfcScaled(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value x); + /// Generate call to Exponent intrinsic runtime routine. mlir::Value genExponent(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type resultType, mlir::Value x); diff --git a/flang/include/flang/Runtime/numeric.h b/flang/include/flang/Runtime/numeric.h index 7d3f91360c8cfb..e051e864316630 100644 --- a/flang/include/flang/Runtime/numeric.h +++ b/flang/include/flang/Runtime/numeric.h @@ -73,6 +73,20 @@ CppTypeFor RTDECL(Ceiling16_16)( #endif #endif +// ERFC_SCALED +CppTypeFor RTDECL(ErfcScaled4)( + CppTypeFor); +CppTypeFor RTDECL(ErfcScaled8)( + CppTypeFor); +#if LDBL_MANT_DIG == 64 +CppTypeFor RTDECL(ErfcScaled10)( + CppTypeFor); +#endif +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +CppTypeFor RTDECL(ErfcScaled16)( + CppTypeFor); +#endif + // EXPONENT is defined to return default INTEGER; support INTEGER(4 & 8) CppTypeFor RTDECL(Exponent4_4)( CppTypeFor); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 4cdf1f2d98caa4..c438ae1250e450 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -224,6 +224,7 @@ static constexpr IntrinsicHandler handlers[]{ {"boundary", asBox, handleDynamicOptional}, {"dim", asValue}}}, /*isElemental=*/false}, + {"erfc_scaled", &I::genErfcScaled}, {"etime", &I::genEtime, {{{"values", asBox}, {"time", asBox}}}, @@ -5777,15 +5778,19 @@ IntrinsicLibrary::genReduce(mlir::Type resultType, return builder.create(loc, result); } if (fir::isa_char(eleTy)) { - // Create mutable fir.box to be passed to the runtime for the result. - fir::MutableBoxValue resultMutableBox = - fir::factory::createTempMutableBox(builder, loc, eleTy); - mlir::Value resultIrBox = - fir::factory::getMutableIRBox(builder, loc, resultMutableBox); + auto charTy = mlir::dyn_cast_or_null(resultType); + assert(charTy && "expect CharacterType"); + fir::factory::CharacterExprHelper charHelper(builder, loc); + mlir::Value len; + if (charTy.hasDynamicLen()) + len = charHelper.readLengthFromBox(fir::getBase(arrayTmp), charTy); + else + len = builder.createIntegerConstant(loc, builder.getI32Type(), + charTy.getLen()); + fir::CharBoxValue temp = charHelper.createCharacterTemp(eleTy, len); fir::runtime::genReduce(builder, loc, array, operation, mask, identity, - ordered, resultIrBox); - // Handle cleanup of allocatable result descriptor and return - return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE"); + ordered, temp.getBuffer()); + return temp; } return fir::runtime::genReduce(builder, loc, array, operation, mask, identity, ordered); @@ -5878,6 +5883,16 @@ mlir::Value IntrinsicLibrary::genRRSpacing(mlir::Type resultType, fir::runtime::genRRSpacing(builder, loc, fir::getBase(args[0]))); } +// ERFC_SCALED +mlir::Value IntrinsicLibrary::genErfcScaled(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 1); + + return builder.createConvert( + loc, resultType, + fir::runtime::genErfcScaled(builder, loc, fir::getBase(args[0]))); +} + // SAME_TYPE_AS fir::ExtendedValue IntrinsicLibrary::genSameTypeAs(mlir::Type resultType, diff --git a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp index 8ac9d64f576b6a..1d13248db59841 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp @@ -22,6 +22,28 @@ using namespace Fortran::runtime; // may not have them in their runtime library. This can occur in the // case of cross compilation, for example. +/// Placeholder for real*10 version of ErfcScaled Intrinsic +struct ForcedErfcScaled10 { + static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ErfcScaled10)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF80(ctx); + return mlir::FunctionType::get(ctx, {ty}, {ty}); + }; + } +}; + +/// Placeholder for real*16 version of ErfcScaled Intrinsic +struct ForcedErfcScaled16 { + static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ErfcScaled16)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF128(ctx); + return mlir::FunctionType::get(ctx, {ty}, {ty}); + }; + } +}; + /// Placeholder for real*10 version of Exponent Intrinsic struct ForcedExponent10_4 { static constexpr const char *name = ExpandAndQuoteKey(RTNAME(Exponent10_4)); @@ -444,6 +466,30 @@ mlir::Value fir::runtime::genRRSpacing(fir::FirOpBuilder &builder, return builder.create(loc, func, args).getResult(0); } +/// Generate call to ErfcScaled intrinsic runtime routine. +mlir::Value fir::runtime::genErfcScaled(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value x) { + mlir::func::FuncOp func; + mlir::Type fltTy = x.getType(); + + if (fltTy.isF32()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fltTy.isF64()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fltTy.isF80()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fltTy.isF128()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else + fir::intrinsicTypeTODO(builder, fltTy, loc, "ERFC_SCALED"); + + auto funcTy = func.getFunctionType(); + llvm::SmallVector args = { + builder.createConvert(loc, funcTy.getInput(0), x)}; + + return builder.create(loc, func, args).getResult(0); +} + /// Generate call to Scale intrinsic runtime routine. mlir::Value fir::runtime::genScale(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value x, diff --git a/flang/runtime/numeric-templates.h b/flang/runtime/numeric-templates.h index 4936e7738a663e..1b5395df945193 100644 --- a/flang/runtime/numeric-templates.h +++ b/flang/runtime/numeric-templates.h @@ -354,6 +354,110 @@ template inline RT_API_ATTRS T Spacing(T x) { } } +// ERFC_SCALED (16.9.71) +template inline RT_API_ATTRS T ErfcScaled(T arg) { + // Coefficients for approximation to erfc in the first interval. + static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02, + 3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1}; + static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02, + 1.28261652607737228e03, 2.84423683343917062e03}; + + // Coefficients for approximation to erfc in the second interval. + static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00, + 6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02, + 1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03, + 2.15311535474403846e-8}; + static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02, + 5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03, + 4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03}; + + // Coefficients for approximation to erfc in the third interval. + static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1, + 1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4, + 1.63153871373020978e-2}; + static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00, + 5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3}; + + constexpr T sqrtpi{1.7724538509078120380404576221783883301349L}; + constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L}; + constexpr T epsilonby2{std::numeric_limits::epsilon() * 0.5}; + constexpr T xneg{-26.628e0}; + constexpr T xhuge{6.71e7}; + constexpr T thresh{0.46875e0}; + constexpr T zero{0.0}; + constexpr T one{1.0}; + constexpr T four{4.0}; + constexpr T sixteen{16.0}; + constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits::min())}; + static_assert(xmax > xhuge, "xmax must be greater than xhuge"); + + T ysq; + T xnum; + T xden; + T del; + T result; + + auto x{arg}; + auto y{std::fabs(x)}; + + if (y <= thresh) { + // evaluate erf for |x| <= 0.46875 + ysq = zero; + if (y > epsilonby2) { + ysq = y * y; + } + xnum = a[4] * ysq; + xden = ysq; + for (int i{0}; i < 3; i++) { + xnum = (xnum + a[i]) * ysq; + xden = (xden + b[i]) * ysq; + } + result = x * (xnum + a[3]) / (xden + b[3]); + result = one - result; + result = std::exp(ysq) * result; + return result; + } else if (y <= four) { + // evaluate erfc for 0.46875 < |x| <= 4.0 + xnum = c[8] * y; + xden = y; + for (int i{0}; i < 7; ++i) { + xnum = (xnum + c[i]) * y; + xden = (xden + d[i]) * y; + } + result = (xnum + c[7]) / (xden + d[7]); + } else { + // evaluate erfc for |x| > 4.0 + result = zero; + if (y >= xhuge) { + if (y < xmax) { + result = rsqrtpi / y; + } + } else { + ysq = one / (y * y); + xnum = p[5] * ysq; + xden = ysq; + for (int i{0}; i < 4; ++i) { + xnum = (xnum + p[i]) * ysq; + xden = (xden + q[i]) * ysq; + } + result = ysq * (xnum + p[4]) / (xden + q[4]); + result = (rsqrtpi - result) / y; + } + } + // fix up for negative argument, erf, etc. + if (x < zero) { + if (x < xneg) { + result = std::numeric_limits::max(); + } else { + ysq = trunc(x * sixteen) / sixteen; + del = (x - ysq) * (x + ysq); + y = std::exp((ysq * ysq)) * std::exp((del)); + result = (y + y) - result; + } + } + return result; +} + } // namespace Fortran::runtime #endif // FORTRAN_RUNTIME_NUMERIC_TEMPLATES_H_ diff --git a/flang/runtime/numeric.cpp b/flang/runtime/numeric.cpp index 2225473c4690e2..7c40beb31083ff 100644 --- a/flang/runtime/numeric.cpp +++ b/flang/runtime/numeric.cpp @@ -316,6 +316,27 @@ CppTypeFor RTDEF(Ceiling16_16)( #endif #endif +CppTypeFor RTDEF(ErfcScaled4)( + CppTypeFor x) { + return ErfcScaled(x); +} +CppTypeFor RTDEF(ErfcScaled8)( + CppTypeFor x) { + return ErfcScaled(x); +} +#if LDBL_MANT_DIG == 64 +CppTypeFor RTDEF(ErfcScaled10)( + CppTypeFor x) { + return ErfcScaled(x); +} +#endif +#if LDBL_MANT_DIG == 113 +CppTypeFor RTDEF(ErfcScaled16)( + CppTypeFor x) { + return ErfcScaled(x); +} +#endif + CppTypeFor RTDEF(Exponent4_4)( CppTypeFor x) { return Exponent>(x); diff --git a/flang/test/Lower/Intrinsics/erfc_scaled.f90 b/flang/test/Lower/Intrinsics/erfc_scaled.f90 new file mode 100644 index 00000000000000..ab5e90cb2409ea --- /dev/null +++ b/flang/test/Lower/Intrinsics/erfc_scaled.f90 @@ -0,0 +1,23 @@ +! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s + +! CHECK-LABEL: func @_QPerfc_scaled4( +! CHECK-SAME: %[[x:[^:]+]]: !fir.ref{{.*}}) -> f32 +function erfc_scaled4(x) + real(kind=4) :: erfc_scaled4 + real(kind=4) :: x + erfc_scaled4 = erfc_scaled(x); +! CHECK: %[[a1:.*]] = fir.load %[[x]] : !fir.ref +! CHECK: %{{.*}} = fir.call @_FortranAErfcScaled4(%[[a1]]) {{.*}}: (f32) -> f32 +end function erfc_scaled4 + + +! CHECK-LABEL: func @_QPerfc_scaled8( +! CHECK-SAME: %[[x:[^:]+]]: !fir.ref{{.*}}) -> f64 +function erfc_scaled8(x) + real(kind=8) :: erfc_scaled8 + real(kind=8) :: x + erfc_scaled8 = erfc_scaled(x); +! CHECK: %[[a1:.*]] = fir.load %[[x]] : !fir.ref +! CHECK: %{{.*}} = fir.call @_FortranAErfcScaled8(%[[a1]]) {{.*}}: (f64) -> f64 +end function erfc_scaled8 diff --git a/flang/test/Lower/Intrinsics/reduce.f90 b/flang/test/Lower/Intrinsics/reduce.f90 index 842e626d7cc397..8d7b7798a94c56 100644 --- a/flang/test/Lower/Intrinsics/reduce.f90 +++ b/flang/test/Lower/Intrinsics/reduce.f90 @@ -348,21 +348,25 @@ subroutine char1(a) res = reduce(a, red_char1) end subroutine -! CHECK: fir.call @_FortranAReduceChar1 +! CHECK: %[[CHRTMP:.*]] = fir.alloca !fir.char<1> {bindc_name = ".chrtmp"} +! CHECK: %[[RESULT:.*]] = fir.convert %[[CHRTMP]] : (!fir.ref>) -> !fir.ref +! CHECK: fir.call @_FortranAReduceChar1(%[[RESULT]], {{.*}}) pure function red_char2(a,b) - character(kind=2), intent(in) :: a, b - character(kind=2) :: red_char2 + character(kind=2, len=10), intent(in) :: a, b + character(kind=2, len=10) :: red_char2 red_char2 = a // b end function subroutine char2(a) - character(kind=2), intent(in) :: a(:) - character(kind=2) :: res + character(kind=2, len=10), intent(in) :: a(:) + character(kind=2, len=10) :: res res = reduce(a, red_char2) end subroutine -! CHECK: fir.call @_FortranAReduceChar2 +! CHECK: %[[CHRTMP:.*]] = fir.alloca !fir.char<2,10> {bindc_name = ".chrtmp"} +! CHECK: %[[RESULT:.*]] = fir.convert %[[CHRTMP]] : (!fir.ref>) -> !fir.ref +! CHECK: fir.call @_FortranAReduceChar2(%[[RESULT]], {{.*}}) pure function red_char4(a,b) character(kind=4), intent(in) :: a, b @@ -598,8 +602,8 @@ subroutine char1dim(a) ! CHECK: fir.call @_FortranAReduceCharacter1Dim subroutine char2dim(a) - character(kind=2), intent(in) :: a(:, :) - character(kind=2), allocatable :: res(:) + character(kind=2, len=10), intent(in) :: a(:, :) + character(kind=2, len=10), allocatable :: res(:) res = reduce(a, red_char2, 2) end subroutine @@ -613,4 +617,22 @@ subroutine char4dim(a) ! CHECK: fir.call @_FortranAReduceCharacter4Dim +pure function red_char_dyn(a, b) + character(*), intent(In) :: a, b + character(max(len(a),len(b))) :: red_char_dyn + red_char_dyn = max(a, b) +end function + +subroutine charDyn() + character(5) :: res + character(:), allocatable :: a(:) + allocate(character(10)::a(10)) + res = reduce(a, red_char_dyn) +end subroutine + +! CHECK: %[[BOX_ELESIZE:.*]] = fir.box_elesize %{{.*}} : (!fir.box>>>) -> index +! CHECK: %[[CHRTMP:.*]] = fir.alloca !fir.char<1,?>(%[[BOX_ELESIZE]] : index) {bindc_name = ".chrtmp"} +! CHECK: %[[RESULT:.*]] = fir.convert %[[CHRTMP]] : (!fir.ref>) -> !fir.ref +! CHECK: fir.call @_FortranAReduceChar1(%[[RESULT]], {{.*}}) + end module diff --git a/flang/unittests/Runtime/Numeric.cpp b/flang/unittests/Runtime/Numeric.cpp index b69ff21ea79fb0..9f77e165707834 100644 --- a/flang/unittests/Runtime/Numeric.cpp +++ b/flang/unittests/Runtime/Numeric.cpp @@ -31,6 +31,14 @@ TEST(Numeric, Floor) { EXPECT_EQ(RTNAME(Floor4_1)(Real<4>{0}), 0); } +TEST(Numeric, Erfc_scaled) { + EXPECT_NEAR(RTNAME(ErfcScaled4)(Real<4>{20.0}), 0.02817434874, 1.0e-8); + EXPECT_NEAR(RTNAME(ErfcScaled8)(Real<8>{20.0}), 0.02817434874, 1.0e-11); +#if LDBL_MANT_DIG == 64 + EXPECT_NEAR(RTNAME(ErfcScaled10)(Real<10>{20.0}), 0.02817434874, 1.0e-8); +#endif +} + TEST(Numeric, Exponent) { EXPECT_EQ(RTNAME(Exponent4_4)(Real<4>{0}), 0); EXPECT_EQ(RTNAME(Exponent4_8)(Real<4>{1.0}), 1); diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 381061ce3fcbf0..db96a80051a8dd 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -541,6 +541,8 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.rintf16 libc.src.math.roundf16 libc.src.math.roundevenf16 + libc.src.math.totalorderf16 + libc.src.math.totalordermagf16 libc.src.math.truncf16 libc.src.math.ufromfpf16 libc.src.math.ufromfpxf16 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index e99960b12441da..355eaf33ace6d1 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -571,6 +571,8 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.rintf16 libc.src.math.roundf16 libc.src.math.roundevenf16 + libc.src.math.totalorderf16 + libc.src.math.totalordermagf16 libc.src.math.truncf16 libc.src.math.ufromfpf16 libc.src.math.ufromfpxf16 diff --git a/libc/docs/c23.rst b/libc/docs/c23.rst index fec9b24bbd5815..4134befd1ed358 100644 --- a/libc/docs/c23.rst +++ b/libc/docs/c23.rst @@ -42,8 +42,8 @@ Additions: * rsqrt* * __STDC_IEC_60559_DFP__ functions (_Decimal32, _Decimal64, _Decimal128) * compoundn* - * totalorder* - * totalordermag* + * totalorder* |check| + * totalordermag* |check| * getpayload* * setpayload* * iscannonical diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index f83a646c34b57c..d556885eda6223 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -210,6 +210,10 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | scalbn | |check| | |check| | |check| | | |check| | 7.12.6.19 | F.10.3.19 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| totalorder | | | | |check| | | F.10.12.1 | N/A | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| totalordermag | | | | |check| | | F.10.12.2 | N/A | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | trunc | |check| | |check| | |check| | |check| | |check| | 7.12.9.9 | F.10.6.9 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | ufromfp | |check| | |check| | |check| | |check| | |check| | 7.12.9.10 | F.10.6.10 | diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 34169948fc6d27..b134ec00a7d7a2 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -710,6 +710,10 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"canonicalizel", RetValSpec, [ArgSpec, ArgSpec]>, GuardedFunctionSpec<"canonicalizef16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, GuardedFunctionSpec<"canonicalizef128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, + + GuardedFunctionSpec<"totalorderf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + + GuardedFunctionSpec<"totalordermagf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, ] >; diff --git a/libc/src/__support/FPUtil/BasicOperations.h b/libc/src/__support/FPUtil/BasicOperations.h index e5ac101fedc0e3..beb8e48db8f51b 100644 --- a/libc/src/__support/FPUtil/BasicOperations.h +++ b/libc/src/__support/FPUtil/BasicOperations.h @@ -240,6 +240,31 @@ LIBC_INLINE int canonicalize(T &cx, const T &x) { return 0; } +template +LIBC_INLINE cpp::enable_if_t, bool> +totalorder(T x, T y) { + using FPBits = FPBits; + FPBits x_bits(x); + FPBits y_bits(y); + + using StorageType = typename FPBits::StorageType; + StorageType x_u = x_bits.uintval(); + StorageType y_u = y_bits.uintval(); + + using signed_t = cpp::make_signed_t; + signed_t x_signed = static_cast(x_u); + signed_t y_signed = static_cast(y_u); + + bool both_neg = (x_u & y_u & FPBits::SIGN_MASK) != 0; + return x_signed == y_signed || ((x_signed <= y_signed) != both_neg); +} + +template +LIBC_INLINE cpp::enable_if_t, bool> +totalordermag(T x, T y) { + return FPBits(x).abs().uintval() <= FPBits(y).abs().uintval(); +} + } // namespace fputil } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 82dfdaf479ff00..2446c293b8ef5a 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -369,6 +369,10 @@ add_math_entrypoint_object(tanhf) add_math_entrypoint_object(tgamma) add_math_entrypoint_object(tgammaf) +add_math_entrypoint_object(totalorderf16) + +add_math_entrypoint_object(totalordermagf16) + add_math_entrypoint_object(trunc) add_math_entrypoint_object(truncf) add_math_entrypoint_object(truncl) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index f4f683e61bd658..673bef516b13d1 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3577,3 +3577,27 @@ add_entrypoint_object( COMPILE_OPTIONS -O3 ) + +add_entrypoint_object( + totalorderf16 + SRCS + totalorderf16.cpp + HDRS + ../totalorderf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + COMPILE_OPTIONS + -O3 +) + +add_entrypoint_object( + totalordermagf16 + SRCS + totalordermagf16.cpp + HDRS + ../totalordermagf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + COMPILE_OPTIONS + -O3 +) diff --git a/libc/src/math/generic/totalorderf16.cpp b/libc/src/math/generic/totalorderf16.cpp new file mode 100644 index 00000000000000..e43beb33d2fd3d --- /dev/null +++ b/libc/src/math/generic/totalorderf16.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of totalorderf16 function --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/totalorderf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, totalorderf16, (const float16 *x, const float16 *y)) { + return static_cast(fputil::totalorder(*x, *y)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/totalordermagf16.cpp b/libc/src/math/generic/totalordermagf16.cpp new file mode 100644 index 00000000000000..09d04fbeb2d2c6 --- /dev/null +++ b/libc/src/math/generic/totalordermagf16.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of totalordermagf16 function -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/totalordermagf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, totalordermagf16, + (const float16 *x, const float16 *y)) { + return static_cast(fputil::totalordermag(*x, *y)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/totalorderf16.h b/libc/src/math/totalorderf16.h new file mode 100644 index 00000000000000..f5390140c4dc2e --- /dev/null +++ b/libc/src/math/totalorderf16.h @@ -0,0 +1,20 @@ +//===-- Implementation header for totalorderf16 -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_TOTALORDERF16_H +#define LLVM_LIBC_SRC_MATH_TOTALORDERF16_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +int totalorderf16(const float16 *x, const float16 *y); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_TOTALORDERF16_H diff --git a/libc/src/math/totalordermagf16.h b/libc/src/math/totalordermagf16.h new file mode 100644 index 00000000000000..8c6621b9783dfb --- /dev/null +++ b/libc/src/math/totalordermagf16.h @@ -0,0 +1,20 @@ +//===-- Implementation header for totalordermagf16 --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_TOTALORDERMAGF16_H +#define LLVM_LIBC_SRC_MATH_TOTALORDERMAGF16_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +int totalordermagf16(const float16 *x, const float16 *y); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_TOTALORDERMAGF16_H diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h index 26af5cec02b587..86b823260e1979 100644 --- a/libc/test/UnitTest/FPMatcher.h +++ b/libc/test/UnitTest/FPMatcher.h @@ -97,8 +97,10 @@ template struct FPTest : public Test { LIBC_NAMESPACE::cpp::numeric_limits::max(); \ const T zero = FPBits::zero(Sign::POS).get_val(); \ const T neg_zero = FPBits::zero(Sign::NEG).get_val(); \ - const T aNaN = FPBits::quiet_nan().get_val(); \ - const T sNaN = FPBits::signaling_nan().get_val(); \ + const T aNaN = FPBits::quiet_nan(Sign::POS).get_val(); \ + const T neg_aNaN = FPBits::quiet_nan(Sign::NEG).get_val(); \ + const T sNaN = FPBits::signaling_nan(Sign::POS).get_val(); \ + const T neg_sNaN = FPBits::signaling_nan(Sign::NEG).get_val(); \ const T inf = FPBits::inf(Sign::POS).get_val(); \ const T neg_inf = FPBits::inf(Sign::NEG).get_val(); \ const T min_normal = FPBits::min_normal().get_val(); \ diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 75e2bdd7be100a..68cd412b14e9d3 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3519,3 +3519,27 @@ add_fp_unittest( libc.src.math.powf libc.src.__support.FPUtil.fp_bits ) + +add_fp_unittest( + totalorderf16_test + SUITE + libc-math-smoke-tests + SRCS + totalorderf16_test.cpp + HDRS + TotalOrderTest.h + DEPENDS + libc.src.math.totalorderf16 +) + +add_fp_unittest( + totalordermagf16_test + SUITE + libc-math-smoke-tests + SRCS + totalordermagf16_test.cpp + HDRS + TotalOrderMagTest.h + DEPENDS + libc.src.math.totalordermagf16 +) diff --git a/libc/test/src/math/smoke/TotalOrderMagTest.h b/libc/test/src/math/smoke/TotalOrderMagTest.h new file mode 100644 index 00000000000000..5fe2983a0e678b --- /dev/null +++ b/libc/test/src/math/smoke/TotalOrderMagTest.h @@ -0,0 +1,142 @@ +//===-- Utility class to test flavors of totalordermag ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERMAGTEST_H +#define LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERMAGTEST_H + +#include "test/UnitTest/FEnvSafeTest.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +template +class TotalOrderMagTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { + + DECLARE_SPECIAL_CONSTANTS(T) + +public: + typedef int (*TotalOrderMagFunc)(const T *, const T *); + + bool funcWrapper(TotalOrderMagFunc func, T x, T y) { + return func(&x, &y) != 0; + } + + void testXLesserThanY(TotalOrderMagFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_inf, inf)); + + EXPECT_TRUE(funcWrapper(func, T(0.0), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(0.0), T(123.38))); + + EXPECT_FALSE(funcWrapper(func, T(-0.1), T(0.0))); + EXPECT_FALSE(funcWrapper(func, T(-123.38), T(0.0))); + + EXPECT_TRUE(funcWrapper(func, T(-0.1), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(-123.38), T(123.38))); + } + + void testXGreaterThanY(TotalOrderMagFunc func) { + EXPECT_TRUE(funcWrapper(func, inf, neg_inf)); + + EXPECT_TRUE(funcWrapper(func, T(0.0), T(-0.1))); + EXPECT_TRUE(funcWrapper(func, T(0.0), T(-123.38))); + + EXPECT_FALSE(funcWrapper(func, T(0.1), T(0.0))); + EXPECT_FALSE(funcWrapper(func, T(123.38), T(0.0))); + + EXPECT_TRUE(funcWrapper(func, T(0.1), T(-0.1))); + EXPECT_TRUE(funcWrapper(func, T(123.38), T(-123.38))); + } + + void testXEqualToY(TotalOrderMagFunc func) { + EXPECT_TRUE(funcWrapper(func, inf, inf)); + EXPECT_TRUE(funcWrapper(func, neg_inf, neg_inf)); + + EXPECT_TRUE(funcWrapper(func, T(-0.0), T(0.0))); + EXPECT_TRUE(funcWrapper(func, T(0.0), T(-0.0))); + + EXPECT_TRUE(funcWrapper(func, T(0.0), T(0.0))); + EXPECT_TRUE(funcWrapper(func, T(-0.0), T(-0.0))); + EXPECT_TRUE(funcWrapper(func, T(0.1), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(-0.1), T(-0.1))); + EXPECT_TRUE(funcWrapper(func, T(123.38), T(123.38))); + EXPECT_TRUE(funcWrapper(func, T(-123.38), T(-123.38))); + } + + void testSingleNaN(TotalOrderMagFunc func) { + EXPECT_FALSE(funcWrapper(func, neg_aNaN, T(0.0))); + EXPECT_FALSE(funcWrapper(func, neg_aNaN, T(0.1))); + EXPECT_FALSE(funcWrapper(func, neg_aNaN, T(123.38))); + + EXPECT_TRUE(funcWrapper(func, T(0.0), neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, T(0.1), neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, T(123.38), neg_aNaN)); + + EXPECT_TRUE(funcWrapper(func, T(0.0), aNaN)); + EXPECT_TRUE(funcWrapper(func, T(0.1), aNaN)); + EXPECT_TRUE(funcWrapper(func, T(123.38), aNaN)); + + EXPECT_FALSE(funcWrapper(func, aNaN, T(0.0))); + EXPECT_FALSE(funcWrapper(func, aNaN, T(0.1))); + EXPECT_FALSE(funcWrapper(func, aNaN, T(123.38))); + } + + void testNaNSigns(TotalOrderMagFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_aNaN, aNaN)); + EXPECT_FALSE(funcWrapper(func, neg_aNaN, sNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, sNaN)); + + EXPECT_TRUE(funcWrapper(func, aNaN, neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, aNaN, neg_sNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, neg_sNaN)); + } + + void testQuietVsSignalingNaN(TotalOrderMagFunc func) { + EXPECT_FALSE(funcWrapper(func, neg_aNaN, neg_sNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, aNaN)); + EXPECT_FALSE(funcWrapper(func, aNaN, sNaN)); + } + + void testNaNPayloads(TotalOrderMagFunc func) { + T qnan_123 = FPBits::quiet_nan(Sign::POS, 0x123).get_val(); + T neg_qnan_123 = FPBits::quiet_nan(Sign::NEG, 0x123).get_val(); + T snan_123 = FPBits::signaling_nan(Sign::POS, 0x123).get_val(); + T neg_snan_123 = FPBits::signaling_nan(Sign::NEG, 0x123).get_val(); + + EXPECT_TRUE(funcWrapper(func, aNaN, aNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, sNaN)); + EXPECT_TRUE(funcWrapper(func, aNaN, qnan_123)); + EXPECT_TRUE(funcWrapper(func, sNaN, snan_123)); + EXPECT_FALSE(funcWrapper(func, qnan_123, aNaN)); + EXPECT_FALSE(funcWrapper(func, snan_123, sNaN)); + + EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_sNaN)); + EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_qnan_123)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_snan_123)); + EXPECT_FALSE(funcWrapper(func, neg_qnan_123, neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, neg_snan_123, neg_sNaN)); + } +}; + +#define LIST_TOTALORDERMAG_TESTS(T, func) \ + using LlvmLibcTotalOrderMagTest = TotalOrderMagTestTemplate; \ + TEST_F(LlvmLibcTotalOrderMagTest, XLesserThanY) { testXLesserThanY(&func); } \ + TEST_F(LlvmLibcTotalOrderMagTest, XGreaterThanY) { \ + testXGreaterThanY(&func); \ + } \ + TEST_F(LlvmLibcTotalOrderMagTest, XEqualToY) { testXEqualToY(&func); } \ + TEST_F(LlvmLibcTotalOrderMagTest, SingleNaN) { testSingleNaN(&func); } \ + TEST_F(LlvmLibcTotalOrderMagTest, NaNSigns) { testNaNSigns(&func); } \ + TEST_F(LlvmLibcTotalOrderMagTest, QuietVsSignalingNaN) { \ + testQuietVsSignalingNaN(&func); \ + } \ + TEST_F(LlvmLibcTotalOrderMagTest, NaNPayloads) { testNaNPayloads(&func); } + +#endif // LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERMAGTEST_H diff --git a/libc/test/src/math/smoke/TotalOrderTest.h b/libc/test/src/math/smoke/TotalOrderTest.h new file mode 100644 index 00000000000000..281b2a59f930db --- /dev/null +++ b/libc/test/src/math/smoke/TotalOrderTest.h @@ -0,0 +1,138 @@ +//===-- Utility class to test different flavors of totalorder ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERTEST_H +#define LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERTEST_H + +#include "test/UnitTest/FEnvSafeTest.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +template +class TotalOrderTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { + + DECLARE_SPECIAL_CONSTANTS(T) + +public: + typedef int (*TotalOrderFunc)(const T *, const T *); + + bool funcWrapper(TotalOrderFunc func, T x, T y) { return func(&x, &y) != 0; } + + void testXLesserThanY(TotalOrderFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_inf, inf)); + + EXPECT_TRUE(funcWrapper(func, T(0.0), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(0.0), T(123.38))); + + EXPECT_TRUE(funcWrapper(func, T(-0.1), T(0.0))); + EXPECT_TRUE(funcWrapper(func, T(-123.38), T(0.0))); + + EXPECT_TRUE(funcWrapper(func, T(-0.1), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(-123.38), T(123.38))); + } + + void testXGreaterThanY(TotalOrderFunc func) { + EXPECT_FALSE(funcWrapper(func, inf, neg_inf)); + + EXPECT_FALSE(funcWrapper(func, T(0.0), T(-0.1))); + EXPECT_FALSE(funcWrapper(func, T(0.0), T(-123.38))); + + EXPECT_FALSE(funcWrapper(func, T(0.1), T(0.0))); + EXPECT_FALSE(funcWrapper(func, T(123.38), T(0.0))); + + EXPECT_FALSE(funcWrapper(func, T(0.1), T(-0.1))); + EXPECT_FALSE(funcWrapper(func, T(123.38), T(-123.38))); + } + + void testXEqualToY(TotalOrderFunc func) { + EXPECT_TRUE(funcWrapper(func, inf, inf)); + EXPECT_TRUE(funcWrapper(func, neg_inf, neg_inf)); + + EXPECT_TRUE(funcWrapper(func, T(-0.0), T(0.0))); + EXPECT_FALSE(funcWrapper(func, T(0.0), T(-0.0))); + + EXPECT_TRUE(funcWrapper(func, T(0.0), T(0.0))); + EXPECT_TRUE(funcWrapper(func, T(-0.0), T(-0.0))); + EXPECT_TRUE(funcWrapper(func, T(0.1), T(0.1))); + EXPECT_TRUE(funcWrapper(func, T(-0.1), T(-0.1))); + EXPECT_TRUE(funcWrapper(func, T(123.38), T(123.38))); + EXPECT_TRUE(funcWrapper(func, T(-123.38), T(-123.38))); + } + + void testSingleNaN(TotalOrderFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_aNaN, T(0.0))); + EXPECT_TRUE(funcWrapper(func, neg_aNaN, T(0.1))); + EXPECT_TRUE(funcWrapper(func, neg_aNaN, T(123.38))); + + EXPECT_FALSE(funcWrapper(func, T(0.0), neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, T(0.1), neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, T(123.38), neg_aNaN)); + + EXPECT_TRUE(funcWrapper(func, T(0.0), aNaN)); + EXPECT_TRUE(funcWrapper(func, T(0.1), aNaN)); + EXPECT_TRUE(funcWrapper(func, T(123.38), aNaN)); + + EXPECT_FALSE(funcWrapper(func, aNaN, T(0.0))); + EXPECT_FALSE(funcWrapper(func, aNaN, T(0.1))); + EXPECT_FALSE(funcWrapper(func, aNaN, T(123.38))); + } + + void testNaNSigns(TotalOrderFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_aNaN, aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_aNaN, sNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, sNaN)); + + EXPECT_FALSE(funcWrapper(func, aNaN, neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, aNaN, neg_sNaN)); + EXPECT_FALSE(funcWrapper(func, sNaN, neg_aNaN)); + EXPECT_FALSE(funcWrapper(func, sNaN, neg_sNaN)); + } + + void testQuietVsSignalingNaN(TotalOrderFunc func) { + EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_sNaN)); + EXPECT_FALSE(funcWrapper(func, neg_sNaN, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, aNaN)); + EXPECT_FALSE(funcWrapper(func, aNaN, sNaN)); + } + + void testNaNPayloads(TotalOrderFunc func) { + T qnan_123 = FPBits::quiet_nan(Sign::POS, 0x123).get_val(); + T neg_qnan_123 = FPBits::quiet_nan(Sign::NEG, 0x123).get_val(); + T snan_123 = FPBits::signaling_nan(Sign::POS, 0x123).get_val(); + T neg_snan_123 = FPBits::signaling_nan(Sign::NEG, 0x123).get_val(); + + EXPECT_TRUE(funcWrapper(func, aNaN, aNaN)); + EXPECT_TRUE(funcWrapper(func, sNaN, sNaN)); + EXPECT_TRUE(funcWrapper(func, aNaN, qnan_123)); + EXPECT_TRUE(funcWrapper(func, sNaN, snan_123)); + EXPECT_FALSE(funcWrapper(func, qnan_123, aNaN)); + EXPECT_FALSE(funcWrapper(func, snan_123, sNaN)); + + EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_sNaN)); + EXPECT_FALSE(funcWrapper(func, neg_aNaN, neg_qnan_123)); + EXPECT_FALSE(funcWrapper(func, neg_sNaN, neg_snan_123)); + EXPECT_TRUE(funcWrapper(func, neg_qnan_123, neg_aNaN)); + EXPECT_TRUE(funcWrapper(func, neg_snan_123, neg_sNaN)); + } +}; + +#define LIST_TOTALORDER_TESTS(T, func) \ + using LlvmLibcTotalOrderTest = TotalOrderTestTemplate; \ + TEST_F(LlvmLibcTotalOrderTest, XLesserThanY) { testXLesserThanY(&func); } \ + TEST_F(LlvmLibcTotalOrderTest, XGreaterThanY) { testXGreaterThanY(&func); } \ + TEST_F(LlvmLibcTotalOrderTest, XEqualToY) { testXEqualToY(&func); } \ + TEST_F(LlvmLibcTotalOrderTest, SingleNaN) { testSingleNaN(&func); } \ + TEST_F(LlvmLibcTotalOrderTest, NaNSigns) { testNaNSigns(&func); } \ + TEST_F(LlvmLibcTotalOrderTest, QuietVsSignalingNaN) { \ + testQuietVsSignalingNaN(&func); \ + } \ + TEST_F(LlvmLibcTotalOrderTest, NaNPayloads) { testNaNPayloads(&func); } + +#endif // LIBC_TEST_SRC_MATH_SMOKE_TOTALORDERTEST_H diff --git a/libc/test/src/math/smoke/totalorderf16_test.cpp b/libc/test/src/math/smoke/totalorderf16_test.cpp new file mode 100644 index 00000000000000..410c70c47c51d8 --- /dev/null +++ b/libc/test/src/math/smoke/totalorderf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for totalorderf16 ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TotalOrderTest.h" + +#include "src/math/totalorderf16.h" + +LIST_TOTALORDER_TESTS(float16, LIBC_NAMESPACE::totalorderf16) diff --git a/libc/test/src/math/smoke/totalordermagf16_test.cpp b/libc/test/src/math/smoke/totalordermagf16_test.cpp new file mode 100644 index 00000000000000..b09eb11cd9c3bb --- /dev/null +++ b/libc/test/src/math/smoke/totalordermagf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for totalordermagf16 ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TotalOrderMagTest.h" + +#include "src/math/totalordermagf16.h" + +LIST_TOTALORDERMAG_TESTS(float16, LIBC_NAMESPACE::totalordermagf16) diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv index 8d24457186310c..28359b7bb49ac4 100644 --- a/libcxx/docs/Status/Cxx2cIssues.csv +++ b/libcxx/docs/Status/Cxx2cIssues.csv @@ -65,4 +65,5 @@ "`3343 `__","Ordering of calls to ``unlock()`` and ``notify_all()`` in Effects element of ``notify_all_at_thread_exit()`` should be reversed","Not Yet Adopted","|Complete|","16.0","" "XXXX","","The sys_info range should be affected by save","Not Yet Adopted","|Complete|","19.0" "`4071 `__","","``reference_wrapper`` comparisons are not SFINAE-friendly","Not Yet Adopted","|Complete|","19.0" +"`4110 `__","","``shared_ptr(nullptr_t, Deleter)`` is overconstrained, breaking some sensible deleters","Not Yet Adopted","|Complete|","19.0" "","","","","","" diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 00db96185be7c6..7b5002cb95d32b 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -404,7 +404,7 @@ struct __shared_ptr_deleter_ctor_reqs { }; template -using __shared_ptr_nullptr_deleter_ctor_reqs = _And, __well_formed_deleter<_Dp, nullptr_t> >; +using __shared_ptr_nullptr_deleter_ctor_reqs = _And, __well_formed_deleter<_Dp, _Tp*> >; #if defined(_LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI) # define _LIBCPP_SHARED_PTR_TRIVIAL_ABI __attribute__((__trivial_abi__)) diff --git a/libcxx/include/string b/libcxx/include/string index 1db803e822d727..751af8f1476d0d 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -868,23 +868,9 @@ private: static_assert(sizeof(__short) == (sizeof(value_type) * (__min_cap + 1)), "__short has an unexpected size."); - union __ulx { - __long __lx; - __short __lxx; - }; - - enum { __n_words = sizeof(__ulx) / sizeof(size_type) }; - - struct __raw { - size_type __words[__n_words]; - }; - - struct __rep { - union { - __short __s; - __long __l; - __raw __r; - }; + union __rep { + __short __s; + __long __l; }; __compressed_pair<__rep, allocator_type> __r_; @@ -3746,17 +3732,10 @@ template inline _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool operator==(const basic_string, _Allocator>& __lhs, const basic_string, _Allocator>& __rhs) _NOEXCEPT { - size_t __lhs_sz = __lhs.size(); - if (__lhs_sz != __rhs.size()) + size_t __sz = __lhs.size(); + if (__sz != __rhs.size()) return false; - const char* __lp = __lhs.data(); - const char* __rp = __rhs.data(); - if (__lhs.__is_long()) - return char_traits::compare(__lp, __rp, __lhs_sz) == 0; - for (; __lhs_sz != 0; --__lhs_sz, ++__lp, ++__rp) - if (*__lp != *__rp) - return false; - return true; + return char_traits::compare(__lhs.data(), __rhs.data(), __sz) == 0; } #if _LIBCPP_STD_VER <= 17 diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp index a8c468a6c6fd4d..ec3e490c0ed790 100644 --- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp @@ -12,7 +12,7 @@ // XFAIL: libcpp-has-no-experimental-tzdb // XFAIL: availability-tzdb-missing // Times out under HWASan -// XFAIL: hwasan +// UNSUPPORTED: hwasan // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp index 13340ed5294c05..4ea752b36bd018 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp @@ -32,17 +32,16 @@ int A::count = 0; // LWG 3233. Broken requirements for shared_ptr converting constructors // https://cplusplus.github.io/LWG/issue3233 static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); #if TEST_STD_VER >= 17 -static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); +static_assert(std::is_constructible, std::nullptr_t, test_deleter >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); -static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); +static_assert(std::is_constructible, std::nullptr_t, test_deleter >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp index 53ca6fb5b234d4..a479b24c4595ab 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp @@ -33,17 +33,21 @@ int A::count = 0; // LWG 3233. Broken requirements for shared_ptr converting constructors // https://cplusplus.github.io/LWG/issue3233 static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, + ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); #if TEST_STD_VER >= 17 -static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); +static_assert( + std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, + ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); -static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); +static_assert( + std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, + ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp index 562acf56d96fe1..95dcb92b51993c 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp @@ -115,6 +115,14 @@ int main(int, char**) } #endif // TEST_STD_VER >= 11 +#if TEST_STD_VER >= 14 + { + // LWG 4110 + auto deleter = [](auto pointer) { delete pointer; }; + std::shared_ptr p(new int, deleter); + } +#endif + test_function_type(); return 0; } diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp index 9dffbcdd59a735..89e7d0b02d421b 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp @@ -165,5 +165,13 @@ int main(int, char**) test_allocator >::value, ""); } +#if TEST_STD_VER >= 14 + { + // LWG 4110 + auto deleter = [](auto pointer) { delete pointer; }; + std::shared_ptr p(new int, deleter, std::allocator()); + } +#endif + return 0; } diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 0ac2f39a6180c7..7033481d6014b5 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -298,19 +298,31 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { assert(!isCoalescedWeak(referentIsec)); // Personality functions can be referenced via section relocations // if they live in the same object file. Create placeholder synthetic - // symbols for them in the GOT. + // symbols for them in the GOT. If the corresponding symbol is already + // in the GOT, use that to avoid creating a duplicate entry. All GOT + // entries needed by non-unwind sections will have already been added + // by this point. Symbol *&s = personalityTable[{referentIsec, r.addend}]; if (s == nullptr) { - // This runs after dead stripping, so the noDeadStrip argument does not - // matter. - s = make("", /*file=*/nullptr, referentIsec, - r.addend, /*size=*/0, /*isWeakDef=*/false, - /*isExternal=*/false, /*isPrivateExtern=*/false, - /*includeInSymtab=*/true, - /*isReferencedDynamically=*/false, - /*noDeadStrip=*/false); - s->used = true; - in.got->addEntry(s); + Defined *const *gotEntry = + llvm::find_if(referentIsec->symbols, [&](Defined const *d) { + return d->value == static_cast(r.addend) && + d->isInGot(); + }); + if (gotEntry != referentIsec->symbols.end()) { + s = *gotEntry; + } else { + // This runs after dead stripping, so the noDeadStrip argument does + // not matter. + s = make("", /*file=*/nullptr, referentIsec, + r.addend, /*size=*/0, /*isWeakDef=*/false, + /*isExternal=*/false, /*isPrivateExtern=*/false, + /*includeInSymtab=*/true, + /*isReferencedDynamically=*/false, + /*noDeadStrip=*/false); + s->used = true; + in.got->addEntry(s); + } } r.referent = s; r.addend = 0; diff --git a/lld/test/MachO/compact-unwind-both-local-and-dylib-personality.s b/lld/test/MachO/compact-unwind-both-local-and-dylib-personality.s index 676577d6b17e9f..35f39ba5fb1e21 100644 --- a/lld/test/MachO/compact-unwind-both-local-and-dylib-personality.s +++ b/lld/test/MachO/compact-unwind-both-local-and-dylib-personality.s @@ -42,19 +42,20 @@ # RUN: llvm-objdump --macho --indirect-symbols --unwind-info --bind %t/d.out | FileCheck %s --check-prefixes=D -D#%x,OFF=0x100000000 -# A: Indirect symbols for (__DATA_CONST,__got) +# A: Indirect symbols for (__DATA_CONST,__got) 4 entries # A-NEXT: address index name # A: 0x[[#%x,GXX_PERSONALITY_LO:]] [[#]] ___gxx_personality_v0 +# A: 0x[[#%x,PERSONALITY_1:]] [[#]] _personality_1 +# A: 0x[[#%x,PERSONALITY_2:]] [[#]] _personality_2 # A: 0x[[#%x,GXX_PERSONALITY_HI:]] [[#]] ___gxx_personality_v0 -# A: 0x[[#%x,PERSONALITY_1:]] LOCAL -# A: 0x[[#%x,PERSONALITY_2:]] LOCAL # BC: Indirect symbols for (__DATA_CONST,__got) # BC-NEXT: address index name -# C: 0x[[#%x,GXX_PERSONALITY_HI:]] LOCAL # BC: 0x[[#%x,GXX_PERSONALITY_LO:]] LOCAL -# BC: 0x[[#%x,PERSONALITY_1:]] LOCAL -# BC: 0x[[#%x,PERSONALITY_2:]] LOCAL +# C: 0x[[#%x,GXX_PERSONALITY_HI:]] [[#]] ___gxx_personality_v0 +# BC: 0x[[#%x,PERSONALITY_1:]] [[#]] _personality_1 +# BC: 0x[[#%x,PERSONALITY_2:]] [[#]] _personality_2 +# BC-EMPTY: # CHECK: Personality functions: (count = 3) # CHECK-DAG: personality[{{[0-9]+}}]: 0x{{0*}}[[#GXX_PERSONALITY_LO-OFF]] @@ -66,7 +67,7 @@ # A-NEXT: __DATA_CONST __got 0x[[#GXX_PERSONALITY_LO-0]] pointer 0 libc++abi ___gxx_personality_v0 -# D: Indirect symbols for (__DATA_CONST,__got) +# D: Indirect symbols for (__DATA_CONST,__got) 6 entries # D-NEXT: address index name # D: 0x[[#%x,GXX_PERSONALITY_HI:]] [[#]] ___gxx_personality_v0 # D: 0x[[#%x,PERSONALITY_1:]] [[#]] _personality_1 diff --git a/lld/test/MachO/compact-unwind.s b/lld/test/MachO/compact-unwind.s index fa73ccb10a32a2..27e4b44dc0b09f 100644 --- a/lld/test/MachO/compact-unwind.s +++ b/lld/test/MachO/compact-unwind.s @@ -29,7 +29,7 @@ # FIRST: Indirect symbols for (__DATA_CONST,__got) # FIRST-NEXT: address index name # FIRST-DAG: 0x[[#%x,GXX_PERSONALITY:]] [[#]] ___gxx_personality_v0 -# FIRST-DAG: 0x[[#%x,MY_PERSONALITY:]] LOCAL +# FIRST-DAG: 0x[[#%x,MY_PERSONALITY:]] # SECOND: Indirect symbols for (__DATA_CONST,__got) # SECOND-NEXT: address index name diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index db96ee2cec383e..bb2be560ebfff3 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -383,7 +383,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { eCommandProcessMustBePaused), m_step_type(step_type), m_step_scope(step_scope), m_class_options("scripted step") { - AddSimpleArgumentList(eArgTypeThreadID, eArgRepeatOptional); + AddSimpleArgumentList(eArgTypeThreadIndex, eArgRepeatOptional); if (step_type == eStepTypeScripted) { m_all_options.Append(&m_class_options, LLDB_OPT_SET_1 | LLDB_OPT_SET_2, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index 7cf92adc6ef578..992d814793f9d2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -367,20 +367,6 @@ lldb_private::Type *DWARFDIE::ResolveTypeUID(const DWARFDIE &die) const { return nullptr; } -std::vector DWARFDIE::GetDeclContextDIEs() const { - if (!IsValid()) - return {}; - - std::vector result; - DWARFDIE parent = GetParentDeclContextDIE(); - while (parent.IsValid() && parent.GetDIE() != GetDIE()) { - result.push_back(std::move(parent)); - parent = parent.GetParentDeclContextDIE(); - } - - return result; -} - static void GetDeclContextImpl(DWARFDIE die, llvm::SmallSet &seen, std::vector &context) { @@ -491,6 +477,18 @@ static void GetTypeLookupContextImpl(DWARFDIE die, case DW_TAG_base_type: push_ctx(CompilerContextKind::Builtin, name); break; + // If any of the tags below appear in the parent chain, stop the decl + // context and return. Prior to these being in here, if a type existed in a + // namespace "a" like "a::my_struct", but we also have a function in that + // same namespace "a" which contained a type named "my_struct", both would + // return "a::my_struct" as the declaration context since the + // DW_TAG_subprogram would be skipped and its parent would be found. + case DW_TAG_compile_unit: + case DW_TAG_type_unit: + case DW_TAG_subprogram: + case DW_TAG_lexical_block: + case DW_TAG_inlined_subroutine: + return; default: break; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h index 511ca62d0197a8..c74a82061fccf2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h @@ -69,9 +69,6 @@ class DWARFDIE : public DWARFBaseDIE { DWARFDIE GetParentDeclContextDIE() const; - // DeclContext related functions - std::vector GetDeclContextDIEs() const; - /// Return this DIE's decl context as it is needed to look up types /// in Clang modules. This context will include any modules or functions that /// the type is declared in so an exact module match can be efficiently made. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index a52a7d67673742..d9e81f9c105b2d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -3039,95 +3039,6 @@ TypeSP SymbolFileDWARF::FindCompleteObjCDefinitionTypeForDIE( return type_sp; } -// This function helps to ensure that the declaration contexts match for two -// different DIEs. Often times debug information will refer to a forward -// declaration of a type (the equivalent of "struct my_struct;". There will -// often be a declaration of that type elsewhere that has the full definition. -// When we go looking for the full type "my_struct", we will find one or more -// matches in the accelerator tables and we will then need to make sure the -// type was in the same declaration context as the original DIE. This function -// can efficiently compare two DIEs and will return true when the declaration -// context matches, and false when they don't. -bool SymbolFileDWARF::DIEDeclContextsMatch(const DWARFDIE &die1, - const DWARFDIE &die2) { - if (die1 == die2) - return true; - - std::vector decl_ctx_1; - std::vector decl_ctx_2; - // The declaration DIE stack is a stack of the declaration context DIEs all - // the way back to the compile unit. If a type "T" is declared inside a class - // "B", and class "B" is declared inside a class "A" and class "A" is in a - // namespace "lldb", and the namespace is in a compile unit, there will be a - // stack of DIEs: - // - // [0] DW_TAG_class_type for "B" - // [1] DW_TAG_class_type for "A" - // [2] DW_TAG_namespace for "lldb" - // [3] DW_TAG_compile_unit or DW_TAG_partial_unit for the source file. - // - // We grab both contexts and make sure that everything matches all the way - // back to the compiler unit. - - // First lets grab the decl contexts for both DIEs - decl_ctx_1 = die1.GetDeclContextDIEs(); - decl_ctx_2 = die2.GetDeclContextDIEs(); - // Make sure the context arrays have the same size, otherwise we are done - const size_t count1 = decl_ctx_1.size(); - const size_t count2 = decl_ctx_2.size(); - if (count1 != count2) - return false; - - // Make sure the DW_TAG values match all the way back up the compile unit. If - // they don't, then we are done. - DWARFDIE decl_ctx_die1; - DWARFDIE decl_ctx_die2; - size_t i; - for (i = 0; i < count1; i++) { - decl_ctx_die1 = decl_ctx_1[i]; - decl_ctx_die2 = decl_ctx_2[i]; - if (decl_ctx_die1.Tag() != decl_ctx_die2.Tag()) - return false; - } -#ifndef NDEBUG - - // Make sure the top item in the decl context die array is always - // DW_TAG_compile_unit or DW_TAG_partial_unit. If it isn't then - // something went wrong in the DWARFDIE::GetDeclContextDIEs() - // function. - dw_tag_t cu_tag = decl_ctx_1[count1 - 1].Tag(); - UNUSED_IF_ASSERT_DISABLED(cu_tag); - assert(cu_tag == DW_TAG_compile_unit || cu_tag == DW_TAG_partial_unit); - -#endif - // Always skip the compile unit when comparing by only iterating up to "count - // - 1". Here we compare the names as we go. - for (i = 0; i < count1 - 1; i++) { - decl_ctx_die1 = decl_ctx_1[i]; - decl_ctx_die2 = decl_ctx_2[i]; - const char *name1 = decl_ctx_die1.GetName(); - const char *name2 = decl_ctx_die2.GetName(); - // If the string was from a DW_FORM_strp, then the pointer will often be - // the same! - if (name1 == name2) - continue; - - // Name pointers are not equal, so only compare the strings if both are not - // NULL. - if (name1 && name2) { - // If the strings don't compare, we are done... - if (strcmp(name1, name2) != 0) - return false; - } else { - // One name was NULL while the other wasn't - return false; - } - } - // We made it through all of the checks and the declaration contexts are - // equal. - return true; -} - TypeSP SymbolFileDWARF::FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die) { TypeSP type_sp; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 7282c08c6857c9..5d3654efcce544 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -461,8 +461,6 @@ class SymbolFileDWARF : public SymbolFileCommon { FindBlockContainingSpecification(const DWARFDIE &die, dw_offset_t spec_block_die_offset); - bool DIEDeclContextsMatch(const DWARFDIE &die1, const DWARFDIE &die2); - bool ClassContainsSelector(const DWARFDIE &class_die, ConstString selector); /// Parse call site entries (DW_TAG_call_site), including any nested call site diff --git a/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py b/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py index 22dca62045022e..d72a710e8127bf 100644 --- a/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py +++ b/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py @@ -18,6 +18,7 @@ class AttachDeniedTestCase(TestBase): @skipIfWindows @skipIfiOSSimulator @skipIfDarwinEmbedded # ptrace(ATTACH_REQUEST...) won't work on ios/tvos/etc + @skipIfAsan # Times out inconsistently under asan def test_attach_to_process_by_id_denied(self): """Test attach by process id denied""" self.build() diff --git a/lldb/test/API/functionalities/type_types/Makefile b/lldb/test/API/functionalities/type_types/Makefile new file mode 100644 index 00000000000000..3d0b98f13f3d7b --- /dev/null +++ b/lldb/test/API/functionalities/type_types/Makefile @@ -0,0 +1,2 @@ +CXX_SOURCES := main.cpp +include Makefile.rules diff --git a/lldb/test/API/functionalities/type_types/TestFindTypes.py b/lldb/test/API/functionalities/type_types/TestFindTypes.py new file mode 100644 index 00000000000000..42b5c4cfaaf77c --- /dev/null +++ b/lldb/test/API/functionalities/type_types/TestFindTypes.py @@ -0,0 +1,66 @@ +""" +Test the SBModule and SBTarget type lookup APIs to find multiple types. +""" + +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TypeFindFirstTestCase(TestBase): + def test_find_first_type(self): + """ + Test SBTarget::FindTypes() and SBModule::FindTypes() APIs. + + We had issues where our declaration context when finding types was + incorrectly calculated where a type in a namepace, and a type in a + function that was also in the same namespace would match a lookup. For + example: + + namespace a { + struct Foo { + int foo; + }; + + unsigned foo() { + typedef unsigned Foo; + Foo foo = 12; + return foo; + } + } // namespace a + + + Previously LLDB would calculate the declaration context of "a::Foo" + correctly, but incorrectly calculate the declaration context of "Foo" + from within the foo() function as "a::Foo". Adding tests to ensure this + works correctly. + """ + self.build() + target = self.createTestTarget() + exe_module = target.GetModuleAtIndex(0) + self.assertTrue(exe_module.IsValid()) + # Test the SBTarget and SBModule APIs for FindFirstType + for api in [target, exe_module]: + # We should find the "a::Foo" but not the "Foo" type in the function + types = api.FindTypes("a::Foo") + self.assertEqual(types.GetSize(), 1) + type_str0 = str(types.GetTypeAtIndex(0)) + self.assertIn('struct Foo {', type_str0) + + # When we search by type basename, we should find any type whose + # basename matches "Foo", so "a::Foo" and the "Foo" type in the + # function. + types = api.FindTypes("Foo") + self.assertEqual(types.GetSize(), 2) + type_str0 = str(types.GetTypeAtIndex(0)) + type_str1 = str(types.GetTypeAtIndex(1)) + # We don't know which order the types will come back as, so + self.assertEqual(set([str(t).split('\n')[0] for t in types]), set(["typedef Foo", "struct Foo {"])) + + # When we search by type basename with "::" prepended, we should + # only types in the root namespace which means only "Foo" type in + # the function. + types = api.FindTypes("::Foo") + self.assertEqual(types.GetSize(), 1) + type_str0 = str(types.GetTypeAtIndex(0)) + self.assertIn('typedef Foo', type_str0) diff --git a/lldb/test/API/functionalities/type_types/main.cpp b/lldb/test/API/functionalities/type_types/main.cpp new file mode 100644 index 00000000000000..095328932cdc46 --- /dev/null +++ b/lldb/test/API/functionalities/type_types/main.cpp @@ -0,0 +1,15 @@ +namespace a { +struct Foo {}; + +unsigned foo() { + typedef unsigned Foo; + Foo foo = 12; + return foo; +} +} // namespace a + +int main() { + a::Foo f = {}; + a::foo(); + return 0; +} diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp index bea07dfa27cc6a..65da7de1ba2d8a 100644 --- a/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp +++ b/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp @@ -258,3 +258,110 @@ TEST(DWARFDIETest, GetContext) { struct_die.GetTypeLookupContext(), testing::ElementsAre(make_namespace("NAMESPACE"), make_struct("STRUCT"))); } + +TEST(DWARFDIETest, GetContextInFunction) { + // Make sure we get the right context fo each "struct_t" type. The first + // should be "a::struct_t" and the one defined in the "foo" function should be + // "struct_t". Previous DWARFDIE::GetTypeLookupContext() function calls would + // have the "struct_t" in "foo" be "a::struct_t" because it would traverse the + // entire die parent tree and ignore DW_TAG_subprogram and keep traversing the + // parents. + // + // 0x0000000b: DW_TAG_compile_unit + // 0x0000000c: DW_TAG_namespace + // DW_AT_name("a") + // 0x0000000f: DW_TAG_structure_type + // DW_AT_name("struct_t") + // 0x00000019: DW_TAG_subprogram + // DW_AT_name("foo") + // 0x0000001e: DW_TAG_structure_type + // DW_AT_name("struct_t") + // 0x00000028: NULL + // 0x00000029: NULL + // 0x0000002a: NULL + const char *yamldata = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_386 +DWARF: + debug_str: + - '' + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + - Code: 0x2 + Tag: DW_TAG_namespace + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Code: 0x3 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Code: 0x4 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + debug_info: + - Length: 0x27 + Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + - AbbrCode: 0x2 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: a + - AbbrCode: 0x3 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: struct_t + - AbbrCode: 0x4 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: foo + - AbbrCode: 0x3 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: struct_t + - AbbrCode: 0x0 + - AbbrCode: 0x0 + - AbbrCode: 0x0)"; + + YAMLModuleTester t(yamldata); + auto *symbol_file = + llvm::cast(t.GetModule()->GetSymbolFile()); + DWARFUnit *unit = symbol_file->DebugInfo().GetUnitAtIndex(0); + ASSERT_TRUE(unit); + + auto make_namespace = [](llvm::StringRef name) { + return CompilerContext(CompilerContextKind::Namespace, ConstString(name)); + }; + auto make_struct = [](llvm::StringRef name) { + return CompilerContext(CompilerContextKind::Struct, ConstString(name)); + }; + // Grab the "a::struct_t" type from the "a" namespace + DWARFDIE a_struct_die = unit->DIE().GetFirstChild().GetFirstChild(); + ASSERT_TRUE(a_struct_die); + EXPECT_THAT( + a_struct_die.GetDeclContext(), + testing::ElementsAre(make_namespace("a"), make_struct("struct_t"))); + // Grab the "struct_t" defined in the "foo" function. + DWARFDIE foo_struct_die = + unit->DIE().GetFirstChild().GetFirstChild().GetSibling().GetFirstChild(); + EXPECT_THAT(foo_struct_die.GetTypeLookupContext(), + testing::ElementsAre(make_struct("struct_t"))); +} diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h index 5654ad46d6eab0..b19d53621ac867 100644 --- a/llvm/include/llvm/Analysis/CGSCCPassManager.h +++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h @@ -306,6 +306,10 @@ struct CGSCCUpdateResult { SmallDenseSet, 4> &InlinedInternalEdges; + /// Functions that a pass has considered to be dead to be removed at the end + /// of the call graph walk in batch. + SmallVector &DeadFunctions; + /// Weak VHs to keep track of indirect calls for the purposes of detecting /// devirtualization. /// diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index ac8ca207d312b7..a8bbf2c578af9b 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -832,7 +832,7 @@ class LazyCallGraph { /// self-edges and edge removals which result in a spanning tree with no /// more cycles. [[nodiscard]] SmallVector - removeInternalRefEdge(Node &SourceN, ArrayRef TargetNs); + removeInternalRefEdges(ArrayRef> Edges); /// A convenience wrapper around the above to handle trivial cases of /// inserting a new call edge. @@ -1056,18 +1056,18 @@ class LazyCallGraph { /// once SCCs have started to be formed. These routines have strict contracts /// but may be called at any point. - /// Remove a dead function from the call graph (typically to delete it). + /// Remove dead functions from the call graph. /// - /// Note that the function must have an empty use list, and the call graph - /// must be up-to-date prior to calling this. That means it is by itself in - /// a maximal SCC which is by itself in a maximal RefSCC, etc. No structural - /// changes result from calling this routine other than potentially removing - /// entry points into the call graph. + /// These functions should have already been passed to markDeadFunction(). + /// This is done as a batch to prevent compile time blowup as a result of + /// handling a single function at a time. + void removeDeadFunctions(ArrayRef DeadFs); + + /// Mark a function as dead to be removed later by removeDeadFunctions(). /// - /// If SCC formation has begun, this function must not be part of the current - /// DFS in order to call this safely. Typically, the function will have been - /// fully visited by the DFS prior to calling this routine. - void removeDeadFunction(Function &F); + /// The function body should have no incoming or outgoing call or ref edges. + /// For example, a function with a single "unreachable" instruction. + void markDeadFunction(Function &F); /// Add a new function split/outlined from an existing function. /// diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index b9f385f4c4b8fa..7a54fe55014be1 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -704,9 +704,10 @@ class LoopAccessInfo { const PredicatedScalarEvolution &getPSE() const { return *PSE; } private: - /// Analyze the loop. - void analyzeLoop(AAResults *AA, LoopInfo *LI, - const TargetLibraryInfo *TLI, DominatorTree *DT); + /// Analyze the loop. Returns true if all memory access in the loop can be + /// vectorized. + bool analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, + DominatorTree *DT); /// Check if the structure of the loop allows it to be analyzed by this /// pass. diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index 67e10c35894954..45599c940659e4 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -23,12 +23,15 @@ namespace llvm { +class MCAssembler; class MCSection; class MCSubtargetInfo; class MCSymbol; -class MCFragment : public ilist_node_with_parent { +class MCFragment { friend class MCAsmLayout; + friend class MCAssembler; + friend class MCSection; public: enum FragmentType : uint8_t { @@ -51,6 +54,9 @@ class MCFragment : public ilist_node_with_parent { }; private: + // The next fragment within the section. + MCFragment *Next = nullptr; + /// The data for the section this fragment is in. MCSection *Parent; @@ -64,10 +70,6 @@ class MCFragment : public ilist_node_with_parent { /// The layout order of this fragment. unsigned LayoutOrder; - /// The subsection this fragment belongs to. This is 0 if the fragment is not - // in any subsection. - unsigned SubsectionNumber = 0; - FragmentType Kind; protected: @@ -88,6 +90,8 @@ class MCFragment : public ilist_node_with_parent { /// This method will dispatch to the appropriate subclass. void destroy(); + MCFragment *getNext() const { return Next; } + FragmentType getKind() const { return Kind; } MCSection *getParent() const { return Parent; } @@ -104,9 +108,6 @@ class MCFragment : public ilist_node_with_parent { bool hasInstructions() const { return HasInstructions; } void dump() const; - - void setSubsectionNumber(unsigned Value) { SubsectionNumber = Value; } - unsigned getSubsectionNumber() const { return SubsectionNumber; } }; class MCDummyFragment : public MCFragment { diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index e212d546139808..c0a337f5ea45e5 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -41,7 +41,6 @@ class raw_pwrite_stream; /// implementation. class MCObjectStreamer : public MCStreamer { std::unique_ptr Assembler; - MCSection::iterator CurInsertionPoint; bool EmitEHFrame; bool EmitDebugFrame; SmallVector PendingLabels; @@ -94,7 +93,7 @@ class MCObjectStreamer : public MCStreamer { void insert(MCFragment *F) { flushPendingLabels(F); MCSection *CurSection = getCurrentSectionOnly(); - CurSection->getFragmentList().insert(CurInsertionPoint, F); + CurSection->addFragment(*F); F->setParent(CurSection); } diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index 217b9b4b5bc52b..e5455292d5c625 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -14,7 +14,6 @@ #define LLVM_MC_MCSECTION_H #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/ilist.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Alignment.h" @@ -24,20 +23,18 @@ namespace llvm { class MCAsmInfo; +class MCAssembler; class MCContext; class MCExpr; class MCSymbol; class raw_ostream; class Triple; -template <> struct ilist_alloc_traits { - static void deleteNode(MCFragment *V); -}; - /// Instances of this class represent a uniqued identifier for a section in the /// current translation unit. The MCContext class uniques and creates these. class MCSection { public: + friend MCAssembler; static constexpr unsigned NonUniqueID = ~0U; enum SectionVariant { @@ -58,12 +55,29 @@ class MCSection { BundleLockedAlignToEnd }; - using FragmentListType = iplist; + struct iterator { + MCFragment *F = nullptr; + iterator() = default; + explicit iterator(MCFragment *F) : F(F) {} + MCFragment &operator*() const { return *F; } + bool operator==(const iterator &O) const { return F == O.F; } + bool operator!=(const iterator &O) const { return F != O.F; } + iterator &operator++() { + F = F->Next; + return *this; + } + iterator operator++(int) { return iterator(F->Next); } + }; - using const_iterator = FragmentListType::const_iterator; - using iterator = FragmentListType::iterator; + struct FragList { + MCFragment *Head = nullptr; + MCFragment *Tail = nullptr; + }; private: + // At parse time, this holds the fragment list of the current subsection. At + // layout time, this holds the concatenated fragment lists of all subsections. + FragList *CurFragList; MCSymbol *Begin; MCSymbol *End = nullptr; /// The alignment requirement of this section. @@ -92,11 +106,10 @@ class MCSection { MCDummyFragment DummyFragment; - FragmentListType Fragments; - - /// Mapping from subsection number to insertion point for subsection numbers - /// below that number. - SmallVector, 1> SubsectionFragmentMap; + // Mapping from subsection number to fragment list. At layout time, the + // subsection 0 list is replaced with concatenated fragments from all + // subsections. + SmallVector, 1> Subsections; /// State for tracking labels that don't yet have Fragments struct PendingLabel { @@ -171,29 +184,27 @@ class MCSection { bool isRegistered() const { return IsRegistered; } void setIsRegistered(bool Value) { IsRegistered = Value; } - MCSection::FragmentListType &getFragmentList() { return Fragments; } - const MCSection::FragmentListType &getFragmentList() const { - return const_cast(this)->getFragmentList(); - } - - /// Support for MCFragment::getNextNode(). - static FragmentListType MCSection::*getSublistAccess(MCFragment *) { - return &MCSection::Fragments; - } - const MCDummyFragment &getDummyFragment() const { return DummyFragment; } MCDummyFragment &getDummyFragment() { return DummyFragment; } - iterator begin() { return Fragments.begin(); } - const_iterator begin() const { return Fragments.begin(); } - - iterator end() { return Fragments.end(); } - const_iterator end() const { return Fragments.end(); } - bool empty() const { return Fragments.empty(); } - - void addFragment(MCFragment &F) { Fragments.push_back(&F); } + FragList *curFragList() const { return CurFragList; } + iterator begin() const { return iterator(CurFragList->Head); } + iterator end() const { return {}; } + bool empty() const { return !CurFragList->Head; } + + void addFragment(MCFragment &F) { + // The formal layout order will be finalized in MCAssembler::layout. + if (CurFragList->Tail) { + CurFragList->Tail->Next = &F; + F.setLayoutOrder(CurFragList->Tail->getLayoutOrder() + 1); + } else { + CurFragList->Head = &F; + assert(F.getLayoutOrder() == 0); + } + CurFragList->Tail = &F; + } - MCSection::iterator getSubsectionInsertionPoint(unsigned Subsection); + void switchSubsection(unsigned Subsection); void dump() const; diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index d6831eeaa794b7..dae2caf0181e46 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -797,7 +797,7 @@ struct InstrProfValueSiteRecord { /// Value profiling data pairs at a given value site. std::list ValueData; - InstrProfValueSiteRecord() { ValueData.clear(); } + InstrProfValueSiteRecord() = default; template InstrProfValueSiteRecord(InputIterator F, InputIterator L) : ValueData(F, L) {} diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp index 8ae5c3dee6103e..2ed1d98f800688 100644 --- a/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -158,10 +158,12 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { SmallDenseSet, 4> InlinedInternalEdges; + SmallVector DeadFunctions; + CGSCCUpdateResult UR = { - RCWorklist, CWorklist, InvalidRefSCCSet, - InvalidSCCSet, nullptr, PreservedAnalyses::all(), - InlinedInternalEdges, {}}; + RCWorklist, CWorklist, InvalidRefSCCSet, + InvalidSCCSet, nullptr, PreservedAnalyses::all(), + InlinedInternalEdges, DeadFunctions, {}}; // Request PassInstrumentation from analysis manager, will use it to run // instrumenting callbacks for the passes later. @@ -340,6 +342,10 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { } while (!RCWorklist.empty()); } + CG.removeDeadFunctions(DeadFunctions); + for (Function *DeadF : DeadFunctions) + DeadF->eraseFromParent(); + #if defined(EXPENSIVE_CHECKS) // Verify that the call graph is still valid. CG.verify(); @@ -1030,36 +1036,6 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass( return true; }); - // Now do a batch removal of the internal ref edges left. - auto NewRefSCCs = RC->removeInternalRefEdge(N, DeadTargets); - if (!NewRefSCCs.empty()) { - // The old RefSCC is dead, mark it as such. - UR.InvalidatedRefSCCs.insert(RC); - - // Note that we don't bother to invalidate analyses as ref-edge - // connectivity is not really observable in any way and is intended - // exclusively to be used for ordering of transforms rather than for - // analysis conclusions. - - // Update RC to the "bottom". - assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!"); - RC = &C->getOuterRefSCC(); - assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!"); - - // The RC worklist is in reverse postorder, so we enqueue the new ones in - // RPO except for the one which contains the source node as that is the - // "bottom" we will continue processing in the bottom-up walk. - assert(NewRefSCCs.front() == RC && - "New current RefSCC not first in the returned list!"); - for (RefSCC *NewRC : llvm::reverse(llvm::drop_begin(NewRefSCCs))) { - assert(NewRC != RC && "Should not encounter the current RefSCC further " - "in the postorder list of new RefSCCs."); - UR.RCWorklist.insert(NewRC); - LLVM_DEBUG(dbgs() << "Enqueuing a new RefSCC in the update worklist: " - << *NewRC << "\n"); - } - } - // Next demote all the call edges that are now ref edges. This helps make // the SCCs small which should minimize the work below as we don't want to // form cycles that this would break. diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp index 48a7ca0061600b..e6bf8c9cbb289f 100644 --- a/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/llvm/lib/Analysis/LazyCallGraph.cpp @@ -1160,8 +1160,8 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) { } SmallVector -LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, - ArrayRef TargetNs) { +LazyCallGraph::RefSCC::removeInternalRefEdges( + ArrayRef> Edges) { // We return a list of the resulting *new* RefSCCs in post-order. SmallVector Result; @@ -1179,25 +1179,21 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, #endif // First remove the actual edges. - for (Node *TargetN : TargetNs) { - assert(!(*SourceN)[*TargetN].isCall() && + for (auto [SourceN, TargetN] : Edges) { + assert(!(**SourceN)[*TargetN].isCall() && "Cannot remove a call edge, it must first be made a ref edge"); - bool Removed = SourceN->removeEdgeInternal(*TargetN); + bool Removed = (*SourceN)->removeEdgeInternal(*TargetN); (void)Removed; assert(Removed && "Target not in the edge set for this caller?"); } // Direct self references don't impact the ref graph at all. - if (llvm::all_of(TargetNs, - [&](Node *TargetN) { return &SourceN == TargetN; })) - return Result; - // If all targets are in the same SCC as the source, because no call edges // were removed there is no RefSCC structure change. - SCC &SourceC = *G->lookupSCC(SourceN); - if (llvm::all_of(TargetNs, [&](Node *TargetN) { - return G->lookupSCC(*TargetN) == &SourceC; + if (llvm::all_of(Edges, [&](std::pair E) { + return E.first == E.second || + G->lookupSCC(*E.first) == G->lookupSCC(*E.second); })) return Result; @@ -1499,7 +1495,7 @@ void LazyCallGraph::removeEdge(Node &SourceN, Node &TargetN) { assert(Removed && "Target not in the edge set for this caller?"); } -void LazyCallGraph::removeDeadFunction(Function &F) { +void LazyCallGraph::markDeadFunction(Function &F) { // FIXME: This is unnecessarily restrictive. We should be able to remove // functions which recursively call themselves. assert(F.hasZeroLiveUses() && @@ -1515,57 +1511,66 @@ void LazyCallGraph::removeDeadFunction(Function &F) { Node &N = *NI->second; - // Cannot remove a function which has yet to be visited in the DFS walk, so - // if we have a node at all then we must have an SCC and RefSCC. - auto CI = SCCMap.find(&N); - assert(CI != SCCMap.end() && - "Tried to remove a node without an SCC after DFS walk started!"); - SCC &C = *CI->second; - RefSCC *RC = &C.getOuterRefSCC(); - - // In extremely rare cases, we can delete a dead function which is still in a - // non-trivial RefSCC. This can happen due to spurious ref edges sticking - // around after an IR function reference is removed. - if (RC->size() != 1) { - SmallVector NodesInRC; - for (SCC &OtherC : *RC) { - for (Node &OtherN : OtherC) - NodesInRC.push_back(&OtherN); + // Remove all call edges out of dead function. + for (Edge E : *N) { + if (E.isCall()) + N->setEdgeKind(E.getNode(), Edge::Ref); + } +} + +void LazyCallGraph::removeDeadFunctions(ArrayRef DeadFs) { + if (DeadFs.empty()) + return; + + // Group dead functions by the RefSCC they're in. + DenseMap> RCs; + for (Function *DeadF : DeadFs) { + Node *N = lookup(*DeadF); +#ifndef NDEBUG + for (Edge &E : **N) { + assert(!E.isCall() && + "dead function shouldn't have any outgoing call edges"); } - for (Node *OtherN : NodesInRC) { - if ((*OtherN)->lookup(N)) { - auto NewRefSCCs = - RC->removeInternalRefEdge(*OtherN, ArrayRef(&N)); - // If we've split into multiple RefSCCs, RC is now invalid and the - // RefSCC containing C will be different. - if (!NewRefSCCs.empty()) - RC = &C.getOuterRefSCC(); +#endif + RefSCC *RC = lookupRefSCC(*N); + RCs[RC].push_back(N); + } + // Remove outgoing edges from all dead functions. Dead functions should + // already have had their call edges removed in markDeadFunction(), so we only + // need to worry about spurious ref edges. + for (auto [RC, DeadNs] : RCs) { + SmallVector> InternalEdgesToRemove; + for (Node *DeadN : DeadNs) { + for (Edge &E : **DeadN) { + if (lookupRefSCC(E.getNode()) == RC) + InternalEdgesToRemove.push_back({DeadN, &E.getNode()}); + else + RC->removeOutgoingEdge(*DeadN, E.getNode()); } } + // We ignore the returned RefSCCs since at this point we're done with CGSCC + // iteration and don't need to add it to any worklists. + (void)RC->removeInternalRefEdges(InternalEdgesToRemove); + for (Node *DeadN : DeadNs) { + RefSCC *DeadRC = lookupRefSCC(*DeadN); + assert(DeadRC->size() == 1); + assert(DeadRC->begin()->size() == 1); + DeadRC->clear(); + DeadRC->G = nullptr; + } } + // Clean up data structures. + for (Function *DeadF : DeadFs) { + Node &N = *lookup(*DeadF); + + EntryEdges.removeEdgeInternal(N); + SCCMap.erase(SCCMap.find(&N)); + NodeMap.erase(NodeMap.find(DeadF)); - NodeMap.erase(NI); - EntryEdges.removeEdgeInternal(N); - SCCMap.erase(CI); - - // This node must be the only member of its SCC as it has no callers, and - // that SCC must be the only member of a RefSCC as it has no references. - // Validate these properties first. - assert(C.size() == 1 && "Dead functions must be in a singular SCC"); - assert(RC->size() == 1 && "Dead functions must be in a singular RefSCC"); - - // Finally clear out all the data structures from the node down through the - // components. postorder_ref_scc_iterator will skip empty RefSCCs, so no need - // to adjust LazyCallGraph data structures. - N.clear(); - N.G = nullptr; - N.F = nullptr; - C.clear(); - RC->clear(); - RC->G = nullptr; - - // Nothing to delete as all the objects are allocated in stable bump pointer - // allocators. + N.clear(); + N.G = nullptr; + N.F = nullptr; + } } // Gets the Edge::Kind from one function to another by looking at the function's diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 93b8d28ef749f3..fd8919fff6ff96 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2389,7 +2389,7 @@ bool LoopAccessInfo::canAnalyzeLoop() { return true; } -void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, +bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, DominatorTree *DT) { // Holds the Load and Store instructions. @@ -2430,10 +2430,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // With both a non-vectorizable memory instruction and a convergent // operation, found in this loop, no reason to continue the search. - if (HasComplexMemInst && HasConvergentOp) { - CanVecMem = false; - return; - } + if (HasComplexMemInst && HasConvergentOp) + return false; // Avoid hitting recordAnalysis multiple times. if (HasComplexMemInst) @@ -2508,10 +2506,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, } // Next instr. } // Next block. - if (HasComplexMemInst) { - CanVecMem = false; - return; - } + if (HasComplexMemInst) + return false; // Now we have two lists that hold the loads and the stores. // Next, we find the pointers that they use. @@ -2520,8 +2516,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // care if the pointers are *restrict*. if (!Stores.size()) { LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); - CanVecMem = true; - return; + return true; } MemoryDepChecker::DepCandidates DependentAccesses; @@ -2574,8 +2569,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, LLVM_DEBUG( dbgs() << "LAA: A loop annotated parallel, ignore memory dependency " << "checks.\n"); - CanVecMem = true; - return; + return true; } for (LoadInst *LD : Loads) { @@ -2622,8 +2616,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // other reads in this loop then is it safe to vectorize. if (NumReadWrites == 1 && NumReads == 0) { LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); - CanVecMem = true; - return; + return true; } // Build dependence sets and check whether we need a runtime pointer bounds @@ -2642,21 +2635,20 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, << "cannot identify array bounds"; LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << "the array bounds.\n"); - CanVecMem = false; - return; + return false; } LLVM_DEBUG( dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"); - CanVecMem = true; + bool DepsAreSafe = true; if (Accesses.isDependencyCheckNeeded()) { LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); - CanVecMem = DepChecker->areDepsSafe(DependentAccesses, - Accesses.getDependenciesToCheck(), - Accesses.getUnderlyingObjects()); + DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses, + Accesses.getDependenciesToCheck(), + Accesses.getUnderlyingObjects()); - if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { + if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) { LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); // Clear the dependency checks. We assume they are not needed. @@ -2676,30 +2668,30 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, recordAnalysis("CantCheckMemDepsAtRunTime", I) << "cannot check memory dependencies at runtime"; LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); - CanVecMem = false; - return; + return false; } - - CanVecMem = true; + DepsAreSafe = true; } } if (HasConvergentOp) { recordAnalysis("CantInsertRuntimeCheckWithConvergent") - << "cannot add control dependency to convergent operation"; + << "cannot add control dependency to convergent operation"; LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check " "would be needed with a convergent operation\n"); - CanVecMem = false; - return; + return false; } - if (CanVecMem) + if (DepsAreSafe) { LLVM_DEBUG( dbgs() << "LAA: No unsafe dependent memory operations in loop. We" << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"); - else - emitUnsafeDependenceRemark(); + return true; + } + + emitUnsafeDependenceRemark(); + return false; } void LoopAccessInfo::emitUnsafeDependenceRemark() { @@ -3048,7 +3040,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, MaxTargetVectorWidthInBits); PtrRtChecking = std::make_unique(*DepChecker, SE); if (canAnalyzeLoop()) - analyzeLoop(AA, LI, TLI, DT); + CanVecMem = analyzeLoop(AA, LI, TLI, DT); } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 8490853eda87c2..4ff606d3732388 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -831,6 +831,19 @@ void MCAssembler::layout(MCAsmLayout &Layout) { MCSection *Sec = Layout.getSectionOrder()[i]; Sec->setLayoutOrder(i); + // Chain together fragments from all subsections. + MCDummyFragment Dummy(Sec); + MCFragment *Tail = &Dummy; + for (auto &[_, List] : Sec->Subsections) { + if (!List.Head) + continue; + Tail->Next = List.Head; + Tail = List.Tail; + } + Sec->Subsections.clear(); + Sec->Subsections.push_back({0u, {Dummy.getNext(), Tail}}); + Sec->CurFragList = &Sec->Subsections[0].second; + unsigned FragmentIndex = 0; for (MCFragment &Frag : *Sec) Frag.setLayoutOrder(FragmentIndex++); @@ -1094,7 +1107,7 @@ bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout, uint64_t AlignedOffset = Layout.getFragmentOffset(&BF); uint64_t AlignedSize = 0; - for (const MCFragment *F = BF.getNextNode();; F = F->getNextNode()) { + for (const MCFragment *F = BF.getNext();; F = F->getNext()) { AlignedSize += computeFragmentSize(Layout, *F); if (F == BF.getLastFragment()) break; diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index b70ac86c18ccf1..2eecdb82d30bb0 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -661,25 +661,16 @@ static void AttemptToFoldSymbolOffsetDifference( // this is important when the Subtarget is changed and a new MCDataFragment // is created in the case of foo: instr; .arch_extension ext; instr .if . - // foo. - if (SA.isVariable() || SB.isVariable() || - FA->getSubsectionNumber() != FB->getSubsectionNumber()) + if (SA.isVariable() || SB.isVariable()) return; // Try to find a constant displacement from FA to FB, add the displacement // between the offset in FA of SA and the offset in FB of SB. bool Reverse = false; - if (FA == FB) { + if (FA == FB) Reverse = SA.getOffset() < SB.getOffset(); - } else if (!isa(FA)) { - // Testing FA < FB is slow. Use setLayoutOrder to speed up computation. - // The formal layout order will be finalized in MCAssembler::layout. - if (FA->getLayoutOrder() == 0 || FB->getLayoutOrder()== 0) { - unsigned LayoutOrder = 0; - for (MCFragment &F : *FA->getParent()) - F.setLayoutOrder(++LayoutOrder); - } + else if (!isa(FA)) Reverse = FA->getLayoutOrder() < FB->getLayoutOrder(); - } uint64_t SAOffset = SA.getOffset(), SBOffset = SB.getOffset(); int64_t Displacement = SA.getOffset() - SB.getOffset(); @@ -695,7 +686,7 @@ static void AttemptToFoldSymbolOffsetDifference( // instruction, the difference cannot be resolved as it may be changed by // the linker. bool BBeforeRelax = false, AAfterRelax = false; - for (auto FI = FB->getIterator(), FE = SecA.end(); FI != FE; ++FI) { + for (auto FI = FB; FI; FI = FI->getNext()) { auto DF = dyn_cast(FI); if (DF && DF->isLinkerRelaxable()) { if (&*FI != FB || SBOffset != DF->getContents().size()) @@ -726,12 +717,14 @@ static void AttemptToFoldSymbolOffsetDifference( return; } } - // If the previous loop does not find FA, FA must be a dummy fragment not in - // the fragment list (which means SA is a pending label (see - // flushPendingLabels)). In either case, we can resolve the difference. - assert(Found || isa(FA)); - Addend += Reverse ? -Displacement : Displacement; - FinalizeFolding(); + // If FA and FB belong to the same subsection, either the previous loop + // found FA, or FA is a dummy fragment not in the fragment list (which means + // SA is a pending label (see flushPendingLabels)) or FA and FB belong to + // different subsections. In either case, we can resolve the difference. + if (Found || isa(FA)) { + Addend += Reverse ? -Displacement : Displacement; + FinalizeFolding(); + } } } diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp index 84a587164c788d..6d97e8ce552baf 100644 --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -141,7 +141,7 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const { uint64_t MCAsmLayout::getSectionAddressSize(const MCSection *Sec) const { // The size is the last fragment's end offset. - const MCFragment &F = Sec->getFragmentList().back(); + const MCFragment &F = *Sec->curFragList()->Tail; return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F); } @@ -197,8 +197,6 @@ uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler, /* *** */ -void ilist_alloc_traits::deleteNode(MCFragment *V) { V->destroy(); } - MCFragment::MCFragment(FragmentType Kind, bool HasInstructions, MCSection *Parent) : Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)), LayoutOrder(0), diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index ae4e6915fa294c..bf1ce76cdc14bd 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -180,7 +180,6 @@ void MCObjectStreamer::reset() { if (getContext().getTargetOptions()) Assembler->setRelaxAll(getContext().getTargetOptions()->MCRelaxAll); } - CurInsertionPoint = MCSection::iterator(); EmitEHFrame = true; EmitDebugFrame = false; PendingLabels.clear(); @@ -200,12 +199,7 @@ void MCObjectStreamer::emitFrames(MCAsmBackend *MAB) { } MCFragment *MCObjectStreamer::getCurrentFragment() const { - assert(getCurrentSectionOnly() && "No current section!"); - - if (CurInsertionPoint != getCurrentSectionOnly()->begin()) - return &*std::prev(CurInsertionPoint); - - return nullptr; + return getCurrentSectionOnly()->curFragList()->Tail; } static bool canReuseDataFragment(const MCDataFragment &F, @@ -391,8 +385,7 @@ bool MCObjectStreamer::changeSectionImpl(MCSection *Section, } CurSubsectionIdx = unsigned(IntSubsection); - CurInsertionPoint = - Section->getSubsectionInsertionPoint(CurSubsectionIdx); + Section->switchSubsection(CurSubsectionIdx); return Created; } diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index 9848d7fafe764a..1d9fe2cafd6174 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -24,7 +24,10 @@ MCSection::MCSection(SectionVariant V, StringRef Name, SectionKind K, MCSymbol *Begin) : Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false), HasLayout(false), IsRegistered(false), DummyFragment(this), Name(Name), - Variant(V), Kind(K) {} + Variant(V), Kind(K) { + // The initial subsection number is 0. Create a fragment list. + CurFragList = &Subsections.emplace_back(0u, FragList{}).second; +} MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) { if (!End) @@ -34,7 +37,14 @@ MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) { bool MCSection::hasEnded() const { return End && End->isInSection(); } -MCSection::~MCSection() = default; +MCSection::~MCSection() { + for (auto &[_, Chain] : Subsections) { + for (MCFragment *X = Chain.Head, *Y; X; X = Y) { + Y = X->Next; + X->destroy(); + } + } +} void MCSection::setBundleLockState(BundleLockStateType NewState) { if (NewState == NotBundleLocked) { @@ -55,35 +65,15 @@ void MCSection::setBundleLockState(BundleLockStateType NewState) { ++BundleLockNestingDepth; } -MCSection::iterator -MCSection::getSubsectionInsertionPoint(unsigned Subsection) { - if (Subsection == 0 && SubsectionFragmentMap.empty()) - return end(); - - SmallVectorImpl>::iterator MI = lower_bound( - SubsectionFragmentMap, std::make_pair(Subsection, (MCFragment *)nullptr)); - bool ExactMatch = false; - if (MI != SubsectionFragmentMap.end()) { - ExactMatch = MI->first == Subsection; - if (ExactMatch) - ++MI; - } - iterator IP; - if (MI == SubsectionFragmentMap.end()) - IP = end(); - else - IP = MI->second->getIterator(); - if (!ExactMatch && Subsection != 0) { - // The GNU as documentation claims that subsections have an alignment of 4, - // although this appears not to be the case. - MCFragment *F = new MCDataFragment(); - SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F)); - getFragmentList().insert(IP, F); - F->setParent(this); - F->setSubsectionNumber(Subsection); - } - - return IP; +void MCSection::switchSubsection(unsigned Subsection) { + size_t I = 0, E = Subsections.size(); + while (I != E && Subsections[I].first < Subsection) + ++I; + // If the subsection number is not in the sorted Subsections list, create a + // new fragment list. + if (I == E || Subsections[I].first != Subsection) + Subsections.insert(Subsections.begin() + I, {Subsection, FragList{}}); + CurFragList = &Subsections[I].second; } StringRef MCSection::getVirtualSectionKind() const { return "virtual"; } @@ -111,13 +101,11 @@ void MCSection::flushPendingLabels() { // creating new empty data fragments for each Subsection with labels pending. while (!PendingLabels.empty()) { PendingLabel& Label = PendingLabels[0]; - iterator CurInsertionPoint = - this->getSubsectionInsertionPoint(Label.Subsection); - const MCSymbol *Atom = nullptr; - if (CurInsertionPoint != begin()) - Atom = std::prev(CurInsertionPoint)->getAtom(); + switchSubsection(Label.Subsection); + const MCSymbol *Atom = + CurFragList->Tail ? CurFragList->Tail->getAtom() : nullptr; MCFragment *F = new MCDataFragment(); - getFragmentList().insert(CurInsertionPoint, F); + addFragment(*F); F->setParent(this); F->setAtom(Atom); flushPendingLabels(F, 0, Label.Subsection); diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 451269608f1799..522e268156aa3c 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1864,15 +1864,14 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, if (EmptyFrag.getKind() != MCFragment::FT_Data) report_fatal_error(".init_array section should be aligned"); - IT = std::next(IT); - const MCFragment &AlignFrag = *IT; + const MCFragment &AlignFrag = *EmptyFrag.getNext(); if (AlignFrag.getKind() != MCFragment::FT_Align) report_fatal_error(".init_array section should be aligned"); if (cast(AlignFrag).getAlignment() != Align(is64Bit() ? 8 : 4)) report_fatal_error(".init_array section should be aligned for pointers"); - const MCFragment &Frag = *std::next(IT); + const MCFragment &Frag = *AlignFrag.getNext(); if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) report_fatal_error("only data supported in .init_array section"); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 3b6ea81cdf10ed..54efe4bc25efe7 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -712,17 +712,20 @@ class HexagonAsmBackend : public MCAsmBackend { void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override { + SmallVector Frags; for (auto *I : Layout.getSectionOrder()) { - for (auto &J : *I) { - switch (J.getKind()) { + Frags.clear(); + for (MCFragment &F : *I) + Frags.push_back(&F); + for (size_t J = 0, E = Frags.size(); J != E; ++J) { + switch (Frags[J]->getKind()) { default: break; case MCFragment::FT_Align: { - auto Size = Asm.computeFragmentSize(Layout, J); - for (auto K = J.getIterator(); - K != I->begin() && Size >= HEXAGON_PACKET_SIZE;) { + auto Size = Asm.computeFragmentSize(Layout, *Frags[J]); + for (auto K = J; K != 0 && Size >= HEXAGON_PACKET_SIZE;) { --K; - switch (K->getKind()) { + switch (Frags[K]->getKind()) { default: break; case MCFragment::FT_Align: { @@ -732,7 +735,7 @@ class HexagonAsmBackend : public MCAsmBackend { } case MCFragment::FT_Relaxable: { MCContext &Context = Asm.getContext(); - auto &RF = cast(*K); + auto &RF = cast(*Frags[K]); auto &Inst = const_cast(RF.getInst()); while (Size > 0 && HexagonMCInstrInfo::bundleSize(Inst) < MaxPacketSize) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp index b8e0f3a867f402..d83dadd3016193 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp @@ -62,7 +62,7 @@ const MCFixup *RISCVMCExpr::getPCRelHiFixup(const MCFragment **DFOut) const { uint64_t Offset = AUIPCSymbol->getOffset(); if (DF->getContents().size() == Offset) { - DF = dyn_cast_or_null(DF->getNextNode()); + DF = dyn_cast_or_null(DF->getNext()); if (!DF) return nullptr; Offset = 0; diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 49838e685a6d2b..6bb3e215240a87 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -300,6 +300,72 @@ lookupBuiltin(StringRef DemangledCall, return nullptr; } +static MachineInstr *getBlockStructInstr(Register ParamReg, + MachineRegisterInfo *MRI) { + // We expect the following sequence of instructions: + // %0:_(pN) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.spv.alloca) + // or = G_GLOBAL_VALUE @block_literal_global + // %1:_(pN) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.spv.bitcast), %0 + // %2:_(p4) = G_ADDRSPACE_CAST %1:_(pN) + MachineInstr *MI = MRI->getUniqueVRegDef(ParamReg); + assert(MI->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST && + MI->getOperand(1).isReg()); + Register BitcastReg = MI->getOperand(1).getReg(); + MachineInstr *BitcastMI = MRI->getUniqueVRegDef(BitcastReg); + assert(isSpvIntrinsic(*BitcastMI, Intrinsic::spv_bitcast) && + BitcastMI->getOperand(2).isReg()); + Register ValueReg = BitcastMI->getOperand(2).getReg(); + MachineInstr *ValueMI = MRI->getUniqueVRegDef(ValueReg); + return ValueMI; +} + +// Return an integer constant corresponding to the given register and +// defined in spv_track_constant. +// TODO: maybe unify with prelegalizer pass. +static unsigned getConstFromIntrinsic(Register Reg, MachineRegisterInfo *MRI) { + MachineInstr *DefMI = MRI->getUniqueVRegDef(Reg); + assert(isSpvIntrinsic(*DefMI, Intrinsic::spv_track_constant) && + DefMI->getOperand(2).isReg()); + MachineInstr *DefMI2 = MRI->getUniqueVRegDef(DefMI->getOperand(2).getReg()); + assert(DefMI2->getOpcode() == TargetOpcode::G_CONSTANT && + DefMI2->getOperand(1).isCImm()); + return DefMI2->getOperand(1).getCImm()->getValue().getZExtValue(); +} + +// Return type of the instruction result from spv_assign_type intrinsic. +// TODO: maybe unify with prelegalizer pass. +static const Type *getMachineInstrType(MachineInstr *MI) { + MachineInstr *NextMI = MI->getNextNode(); + if (!NextMI) + return nullptr; + if (isSpvIntrinsic(*NextMI, Intrinsic::spv_assign_name)) + if ((NextMI = NextMI->getNextNode()) == nullptr) + return nullptr; + Register ValueReg = MI->getOperand(0).getReg(); + if ((!isSpvIntrinsic(*NextMI, Intrinsic::spv_assign_type) && + !isSpvIntrinsic(*NextMI, Intrinsic::spv_assign_ptr_type)) || + NextMI->getOperand(1).getReg() != ValueReg) + return nullptr; + Type *Ty = getMDOperandAsType(NextMI->getOperand(2).getMetadata(), 0); + assert(Ty && "Type is expected"); + return Ty; +} + +static const Type *getBlockStructType(Register ParamReg, + MachineRegisterInfo *MRI) { + // In principle, this information should be passed to us from Clang via + // an elementtype attribute. However, said attribute requires that + // the function call be an intrinsic, which is not. Instead, we rely on being + // able to trace this to the declaration of a variable: OpenCL C specification + // section 6.12.5 should guarantee that we can do this. + MachineInstr *MI = getBlockStructInstr(ParamReg, MRI); + if (MI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) + return MI->getOperand(1).getGlobal()->getType(); + assert(isSpvIntrinsic(*MI, Intrinsic::spv_alloca) && + "Blocks in OpenCL C must be traceable to allocation site"); + return getMachineInstrType(MI); +} + //===----------------------------------------------------------------------===// // Helper functions for building misc instructions //===----------------------------------------------------------------------===// @@ -1371,6 +1437,14 @@ static bool generateBarrierInst(const SPIRV::IncomingCall *Call, return buildBarrierInst(Call, Opcode, MIRBuilder, GR); } +static bool generateCastToPtrInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder) { + MIRBuilder.buildInstr(TargetOpcode::G_ADDRSPACE_CAST) + .addDef(Call->ReturnRegister) + .addUse(Call->Arguments[0]); + return true; +} + static bool generateDotOrFMulInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) { @@ -1847,68 +1921,6 @@ static bool buildNDRange(const SPIRV::IncomingCall *Call, .addUse(TmpReg); } -static MachineInstr *getBlockStructInstr(Register ParamReg, - MachineRegisterInfo *MRI) { - // We expect the following sequence of instructions: - // %0:_(pN) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.spv.alloca) - // or = G_GLOBAL_VALUE @block_literal_global - // %1:_(pN) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.spv.bitcast), %0 - // %2:_(p4) = G_ADDRSPACE_CAST %1:_(pN) - MachineInstr *MI = MRI->getUniqueVRegDef(ParamReg); - assert(MI->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST && - MI->getOperand(1).isReg()); - Register BitcastReg = MI->getOperand(1).getReg(); - MachineInstr *BitcastMI = MRI->getUniqueVRegDef(BitcastReg); - assert(isSpvIntrinsic(*BitcastMI, Intrinsic::spv_bitcast) && - BitcastMI->getOperand(2).isReg()); - Register ValueReg = BitcastMI->getOperand(2).getReg(); - MachineInstr *ValueMI = MRI->getUniqueVRegDef(ValueReg); - return ValueMI; -} - -// Return an integer constant corresponding to the given register and -// defined in spv_track_constant. -// TODO: maybe unify with prelegalizer pass. -static unsigned getConstFromIntrinsic(Register Reg, MachineRegisterInfo *MRI) { - MachineInstr *DefMI = MRI->getUniqueVRegDef(Reg); - assert(isSpvIntrinsic(*DefMI, Intrinsic::spv_track_constant) && - DefMI->getOperand(2).isReg()); - MachineInstr *DefMI2 = MRI->getUniqueVRegDef(DefMI->getOperand(2).getReg()); - assert(DefMI2->getOpcode() == TargetOpcode::G_CONSTANT && - DefMI2->getOperand(1).isCImm()); - return DefMI2->getOperand(1).getCImm()->getValue().getZExtValue(); -} - -// Return type of the instruction result from spv_assign_type intrinsic. -// TODO: maybe unify with prelegalizer pass. -static const Type *getMachineInstrType(MachineInstr *MI) { - MachineInstr *NextMI = MI->getNextNode(); - if (isSpvIntrinsic(*NextMI, Intrinsic::spv_assign_name)) - NextMI = NextMI->getNextNode(); - Register ValueReg = MI->getOperand(0).getReg(); - if (!isSpvIntrinsic(*NextMI, Intrinsic::spv_assign_type) || - NextMI->getOperand(1).getReg() != ValueReg) - return nullptr; - Type *Ty = getMDOperandAsType(NextMI->getOperand(2).getMetadata(), 0); - assert(Ty && "Type is expected"); - return Ty; -} - -static const Type *getBlockStructType(Register ParamReg, - MachineRegisterInfo *MRI) { - // In principle, this information should be passed to us from Clang via - // an elementtype attribute. However, said attribute requires that - // the function call be an intrinsic, which is not. Instead, we rely on being - // able to trace this to the declaration of a variable: OpenCL C specification - // section 6.12.5 should guarantee that we can do this. - MachineInstr *MI = getBlockStructInstr(ParamReg, MRI); - if (MI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) - return MI->getOperand(1).getGlobal()->getType(); - assert(isSpvIntrinsic(*MI, Intrinsic::spv_alloca) && - "Blocks in OpenCL C must be traceable to allocation site"); - return getMachineInstrType(MI); -} - // TODO: maybe move to the global register. static SPIRVType * getOrCreateSPIRVDeviceEventPointer(MachineIRBuilder &MIRBuilder, @@ -2322,6 +2334,8 @@ std::optional lowerBuiltin(const StringRef DemangledCall, return generateAtomicFloatingInst(Call.get(), MIRBuilder, GR); case SPIRV::Barrier: return generateBarrierInst(Call.get(), MIRBuilder, GR); + case SPIRV::CastToPtr: + return generateCastToPtrInst(Call.get(), MIRBuilder); case SPIRV::Dot: return generateDotOrFMulInst(Call.get(), MIRBuilder, GR); case SPIRV::Wave: diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index edc9e1a33d9f5a..2edd2992425bd0 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -59,6 +59,7 @@ def IntelSubgroups : BuiltinGroup; def AtomicFloating : BuiltinGroup; def GroupUniform : BuiltinGroup; def KernelClock : BuiltinGroup; +def CastToPtr : BuiltinGroup; //===----------------------------------------------------------------------===// // Class defining a demangled builtin record. The information in the record @@ -595,6 +596,17 @@ defm : DemangledNativeBuiltin<"__spirv_GroupWaitEvents", OpenCL_std, AsyncCopy, defm : DemangledNativeBuiltin<"__spirv_Load", OpenCL_std, LoadStore, 1, 3, OpLoad>; defm : DemangledNativeBuiltin<"__spirv_Store", OpenCL_std, LoadStore, 2, 4, OpStore>; +// Address Space Qualifier Functions/Pointers Conversion Instructions: +defm : DemangledNativeBuiltin<"to_global", OpenCL_std, CastToPtr, 1, 1, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"to_local", OpenCL_std, CastToPtr, 1, 1, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"to_private", OpenCL_std, CastToPtr, 1, 1, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_GenericCastToPtr_ToGlobal", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_GenericCastToPtr_ToLocal", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_GenericCastToPtr_ToPrivate", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_OpGenericCastToPtrExplicit_ToGlobal", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_OpGenericCastToPtrExplicit_ToLocal", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; +defm : DemangledNativeBuiltin<"__spirv_OpGenericCastToPtrExplicit_ToPrivate", OpenCL_std, CastToPtr, 2, 2, OpGenericCastToPtr>; + //===----------------------------------------------------------------------===// // Class defining a work/sub group builtin that should be translated into a // SPIR-V instruction using the defined properties. diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h index 2ec3fb35ca0451..3c8405fadd44e9 100644 --- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h +++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h @@ -16,6 +16,7 @@ #include "MCTargetDesc/SPIRVBaseInfo.h" #include "MCTargetDesc/SPIRVMCTargetDesc.h" +#include "SPIRVUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -285,10 +286,13 @@ class SPIRVGeneralDuplicatesTracker { TT.add(Ty, MF, R); } - void add(const Type *PointerElementType, unsigned AddressSpace, + void add(const Type *PointeeTy, unsigned AddressSpace, const MachineFunction *MF, Register R) { - ST.add(SPIRV::PointerTypeDescriptor(PointerElementType, AddressSpace), MF, - R); + if (isUntypedPointerTy(PointeeTy)) + PointeeTy = + TypedPointerType::get(IntegerType::getInt8Ty(PointeeTy->getContext()), + getPointerAddressSpace(PointeeTy)); + ST.add(SPIRV::PointerTypeDescriptor(PointeeTy, AddressSpace), MF, R); } void add(const Constant *C, const MachineFunction *MF, Register R) { @@ -320,10 +324,13 @@ class SPIRVGeneralDuplicatesTracker { return TT.find(const_cast(Ty), MF); } - Register find(const Type *PointerElementType, unsigned AddressSpace, + Register find(const Type *PointeeTy, unsigned AddressSpace, const MachineFunction *MF) { - return ST.find( - SPIRV::PointerTypeDescriptor(PointerElementType, AddressSpace), MF); + if (isUntypedPointerTy(PointeeTy)) + PointeeTy = + TypedPointerType::get(IntegerType::getInt8Ty(PointeeTy->getContext()), + getPointerAddressSpace(PointeeTy)); + return ST.find(SPIRV::PointerTypeDescriptor(PointeeTy, AddressSpace), MF); } Register find(const Constant *C, const MachineFunction *MF) { diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 7b8e3230bf5534..5c10e04325d515 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -69,7 +69,7 @@ class SPIRVEmitIntrinsics DenseSet AggrStores; // deduce element type of untyped pointers - Type *deduceElementType(Value *I); + Type *deduceElementType(Value *I, bool UnknownElemTypeI8); Type *deduceElementTypeHelper(Value *I); Type *deduceElementTypeHelper(Value *I, std::unordered_set &Visited); Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand, @@ -105,7 +105,8 @@ class SPIRVEmitIntrinsics void replaceMemInstrUses(Instruction *Old, Instruction *New, IRBuilder<> &B); void processInstrAfterVisit(Instruction *I, IRBuilder<> &B); - void insertAssignPtrTypeIntrs(Instruction *I, IRBuilder<> &B); + bool insertAssignPtrTypeIntrs(Instruction *I, IRBuilder<> &B, + bool UnknownElemTypeI8); void insertAssignTypeIntrs(Instruction *I, IRBuilder<> &B); void insertAssignPtrTypeTargetExt(TargetExtType *AssignedType, Value *V, IRBuilder<> &B); @@ -367,6 +368,23 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper( if (Ty) break; } + } else if (auto *CI = dyn_cast(I)) { + static StringMap ResTypeByArg = { + {"to_global", 0}, + {"to_local", 0}, + {"to_private", 0}, + {"__spirv_GenericCastToPtr_ToGlobal", 0}, + {"__spirv_GenericCastToPtr_ToLocal", 0}, + {"__spirv_GenericCastToPtr_ToPrivate", 0}}; + // TODO: maybe improve performance by caching demangled names + if (Function *CalledF = CI->getCalledFunction()) { + std::string DemangledName = + getOclOrSpirvBuiltinDemangledName(CalledF->getName()); + auto AsArgIt = ResTypeByArg.find(DemangledName); + if (AsArgIt != ResTypeByArg.end()) + Ty = deduceElementTypeHelper(CI->getArgOperand(AsArgIt->second), + Visited); + } } // remember the found relationship @@ -460,10 +478,10 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper( return OrigTy; } -Type *SPIRVEmitIntrinsics::deduceElementType(Value *I) { +Type *SPIRVEmitIntrinsics::deduceElementType(Value *I, bool UnknownElemTypeI8) { if (Type *Ty = deduceElementTypeHelper(I)) return Ty; - return IntegerType::getInt8Ty(I->getContext()); + return UnknownElemTypeI8 ? IntegerType::getInt8Ty(I->getContext()) : nullptr; } // If the Instruction has Pointer operands with unresolved types, this function @@ -1152,16 +1170,23 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV, B.CreateIntrinsic(Intrinsic::spv_unref_global, GV.getType(), &GV); } -void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I, - IRBuilder<> &B) { +// Return true, if we can't decide what is the pointee type now and will get +// back to the question later. Return false is spv_assign_ptr_type is not needed +// or can be inserted immediately. +bool SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I, + IRBuilder<> &B, + bool UnknownElemTypeI8) { reportFatalOnTokenType(I); if (!isPointerTy(I->getType()) || !requireAssignType(I) || isa(I)) - return; + return false; setInsertPointAfterDef(B, I); - Type *ElemTy = deduceElementType(I); - buildAssignPtr(B, ElemTy, I); + if (Type *ElemTy = deduceElementType(I, UnknownElemTypeI8)) { + buildAssignPtr(B, ElemTy, I); + return false; + } + return true; } void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I, @@ -1199,7 +1224,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I, buildAssignPtr(B, PType->getElementType(), Op); } else if (isPointerTy(OpTy)) { Type *ElemTy = GR->findDeducedElementType(Op); - buildAssignPtr(B, ElemTy ? ElemTy : deduceElementType(Op), Op); + buildAssignPtr(B, ElemTy ? ElemTy : deduceElementType(Op, true), Op); } else { CallInst *AssignCI = buildIntrWithMD(Intrinsic::spv_assign_type, {OpTy}, Op, Op, {}, B); @@ -1395,10 +1420,15 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { if (isConvergenceIntrinsic(I)) continue; - insertAssignPtrTypeIntrs(I, B); + bool Postpone = insertAssignPtrTypeIntrs(I, B, false); + // if Postpone is true, we can't decide on pointee type yet insertAssignTypeIntrs(I, B); insertPtrCastOrAssignTypeInstr(I, B); insertSpirvDecorations(I, B); + // if instruction requires a pointee type set, let's check if we know it + // already, and force it to be i8 if not + if (Postpone && !GR->findAssignPtrTypeInstr(I)) + insertAssignPtrTypeIntrs(I, B, true); } for (auto &I : instructions(Func)) diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp index 5ccbaf12ddee2e..4383d1c5c0e25d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp @@ -339,6 +339,7 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const { GR.getSPIRVTypeForVReg(MI.getOperand(1).getReg())); break; case SPIRV::OpPtrCastToGeneric: + case SPIRV::OpGenericCastToPtr: validateAccessChain(STI, MRI, GR, MI); break; case SPIRV::OpInBoundsPtrAccessChain: diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h index 6fc200abf46279..77356b7512a739 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h @@ -68,6 +68,11 @@ class SPIRVTargetLowering : public TargetLowering { // extra instructions required to preserve validity of SPIR-V code imposed by // the standard. void finalizeLowering(MachineFunction &MF) const override; + + MVT getPreferredSwitchConditionType(LLVMContext &Context, + EVT ConditionVT) const override { + return ConditionVT.getSimpleVT(); + } }; } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index adc5b36af6f182..53e0432192ca91 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -271,6 +271,21 @@ static SPIRVType *propagateSPIRVType(MachineInstr *MI, SPIRVGlobalRegistry *GR, return SpirvTy; } +// To support current approach and limitations wrt. bit width here we widen a +// scalar register with a bit width greater than 1 to valid sizes and cap it to +// 64 width. +static void widenScalarLLTNextPow2(Register Reg, MachineRegisterInfo &MRI) { + LLT RegType = MRI.getType(Reg); + if (!RegType.isScalar()) + return; + unsigned Sz = RegType.getScalarSizeInBits(); + if (Sz == 1) + return; + unsigned NewSz = std::min(std::max(1u << Log2_32_Ceil(Sz), 8u), 64u); + if (NewSz != Sz) + MRI.setType(Reg, LLT::scalar(NewSz)); +} + static std::pair createNewIdReg(SPIRVType *SpvType, Register SrcReg, MachineRegisterInfo &MRI, const SPIRVGlobalRegistry &GR) { @@ -406,6 +421,11 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, MachineInstr &MI = *MII; unsigned MIOp = MI.getOpcode(); + // validate bit width of scalar registers + for (const auto &MOP : MI.operands()) + if (MOP.isReg()) + widenScalarLLTNextPow2(MOP.getReg(), MRI); + if (isSpvIntrinsic(MI, Intrinsic::spv_assign_ptr_type)) { Register Reg = MI.getOperand(1).getReg(); MIB.setInsertPt(*MI.getParent(), MI.getIterator()); @@ -475,11 +495,6 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MRI); } else if (MIOp == TargetOpcode::G_GLOBAL_VALUE) { propagateSPIRVType(&MI, GR, MRI, MIB); - } else if (MIOp == TargetOpcode::G_BITREVERSE) { - Register Reg = MI.getOperand(0).getReg(); - LLT RegType = MRI.getType(Reg); - if (RegType.getSizeInBits() < 32) - MRI.setType(Reg, LLT::scalar(32)); } if (MII == Begin) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index d9936557776ba1..20e50c8c9e1ae0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -201,6 +201,9 @@ struct RuntimeLibcallSignatureTable { Table[RTLIB::COS_F32] = f32_func_f32; Table[RTLIB::COS_F64] = f64_func_f64; Table[RTLIB::COS_F128] = i64_i64_func_i64_i64; + Table[RTLIB::TAN_F32] = f32_func_f32; + Table[RTLIB::TAN_F64] = f64_func_f64; + Table[RTLIB::TAN_F128] = i64_i64_func_i64_i64; Table[RTLIB::SINCOS_F32] = func_f32_iPTR_iPTR; Table[RTLIB::SINCOS_F64] = func_f64_iPTR_iPTR; Table[RTLIB::SINCOS_F128] = func_i64_i64_iPTR_iPTR; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index bc2eb6dcd541c7..1b8462f2d258ca 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -530,7 +530,7 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, if (!canPadInst(Inst, OS)) return; - if (PendingBA && PendingBA->getNextNode() == OS.getCurrentFragment()) { + if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) { // Macro fusion actually happens and there is no other fragment inserted // after the previous instruction. // @@ -978,8 +978,8 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm, // The layout is done. Mark every fragment as valid. for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { MCSection &Section = *Layout.getSectionOrder()[i]; - Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); - Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); + Layout.getFragmentOffset(&*Section.curFragList()->Tail); + Asm.computeFragmentSize(Layout, *Section.curFragList()->Tail); } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cad3ea4716db3e..0d79e4eb3f75a0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23816,6 +23816,20 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, } } + // Look for X == INT_MIN or X != INT_MIN. We can use NEG and test for + // overflow. + if (isMinSignedConstant(Op1)) { + EVT VT = Op0.getValueType(); + if (VT == MVT::i32 || VT == MVT::i64 || Op0->hasOneUse()) { + SDVTList CmpVTs = DAG.getVTList(VT, MVT::i32); + X86::CondCode CondCode = CC == ISD::SETEQ ? X86::COND_O : X86::COND_NO; + X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); + SDValue Neg = DAG.getNode(X86ISD::SUB, dl, CmpVTs, + DAG.getConstant(0, dl, VT), Op0); + return SDValue(Neg.getNode(), 1); + } + } + // Try to use the carry flag from the add in place of an separate CMP for: // (seteq (add X, -1), -1). Similar for setne. if (isAllOnesConstant(Op1) && Op0.getOpcode() == ISD::ADD && diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index a9747aebf67bbc..1a7b9bc8e3e770 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -197,6 +197,14 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, return *IAA->getAdvisor(); } +void makeFunctionBodyUnreachable(Function &F) { + F.dropAllReferences(); + for (BasicBlock &BB : make_early_inc_range(F)) + BB.eraseFromParent(); + BasicBlock *BB = BasicBlock::Create(F.getContext(), "", &F); + new UnreachableInst(F.getContext(), BB); +} + PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { @@ -448,11 +456,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, }), Calls.end()); - // Clear the body and queue the function itself for deletion when we - // finish inlining and call graph updates. - // Note that after this point, it is an error to do anything other - // than use the callee's address or delete it. - Callee.dropAllReferences(); + // Clear the body and queue the function itself for call graph + // updating when we finish inlining. + makeFunctionBodyUnreachable(Callee); assert(!is_contained(DeadFunctions, &Callee) && "Cannot put cause a function to become dead twice!"); DeadFunctions.push_back(&Callee); @@ -530,7 +536,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (!DeadFunctionsInComdats.empty()) { filterDeadComdatFunctions(DeadFunctionsInComdats); for (auto *Callee : DeadFunctionsInComdats) - Callee->dropAllReferences(); + makeFunctionBodyUnreachable(*Callee); DeadFunctions.append(DeadFunctionsInComdats); } @@ -542,25 +548,18 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // that is OK as all we do is delete things and add pointers to unordered // sets. for (Function *DeadF : DeadFunctions) { + CG.markDeadFunction(*DeadF); // Get the necessary information out of the call graph and nuke the // function there. Also, clear out any cached analyses. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); FAM.clear(*DeadF, DeadF->getName()); AM.clear(DeadC, DeadC.getName()); - auto &DeadRC = DeadC.getOuterRefSCC(); - CG.removeDeadFunction(*DeadF); // Mark the relevant parts of the call graph as invalid so we don't visit // them. UR.InvalidatedSCCs.insert(&DeadC); - UR.InvalidatedRefSCCs.insert(&DeadRC); - - // If the updated SCC was the one containing the deleted function, clear it. - if (&DeadC == UR.UpdatedC) - UR.UpdatedC = nullptr; - // And delete the actual function from the module. - M.getFunctionList().erase(DeadF); + UR.DeadFunctions.push_back(DeadF); ++NumDeleted; } diff --git a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp index d0b9884aa9099b..e9f37d4044cb02 100644 --- a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp +++ b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp @@ -67,16 +67,20 @@ bool CallGraphUpdater::finalize() { FAM.clear(*DeadFn, DeadFn->getName()); AM->clear(*DeadSCC, DeadSCC->getName()); - LCG->removeDeadFunction(*DeadFn); + LCG->markDeadFunction(*DeadFn); // Mark the relevant parts of the call graph as invalid so we don't // visit them. UR->InvalidatedSCCs.insert(DeadSCC); UR->InvalidatedRefSCCs.insert(&DeadRC); + UR->DeadFunctions.push_back(DeadFn); + } else { + // The CGSCC infrastructure batch deletes functions at the end of the + // call graph walk, so only erase the function if we're not using that + // infrastructure. + // The function is now really dead and de-attached from everything. + DeadFn->eraseFromParent(); } - - // The function is now really dead and de-attached from everything. - DeadFn->eraseFromParent(); } } diff --git a/llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll b/llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll new file mode 100644 index 00000000000000..054520d2021b99 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll @@ -0,0 +1,18 @@ +; RUN: llc -O2 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O2 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#INT16:]] = OpTypeInt 16 0 +; CHECK: %[[#PARAM:]] = OpFunctionParameter %[[#INT16]] +; CHECK: OpSwitch %[[#PARAM]] %[[#]] 1 %[[#]] 2 %[[#]] + +define i32 @test_switch(i16 %cond) { +entry: + switch i16 %cond, label %default [ i16 1, label %case_one + i16 2, label %case_two ] +case_one: + ret i32 1 +case_two: + ret i32 2 +default: + ret i32 3 +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll new file mode 100644 index 00000000000000..e3a82b3577701b --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll @@ -0,0 +1,138 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV-DAG: %[[#Char:]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[#GlobalCharPtr:]] = OpTypePointer CrossWorkgroup %[[#Char]] +; CHECK-SPIRV-DAG: %[[#LocalCharPtr:]] = OpTypePointer Workgroup %[[#Char]] +; CHECK-SPIRV-DAG: %[[#PrivateCharPtr:]] = OpTypePointer Function %[[#Char]] +; CHECK-SPIRV-DAG: %[[#GenericCharPtr:]] = OpTypePointer Generic %[[#Char]] + +; CHECK-SPIRV-DAG: %[[#Int:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#GlobalIntPtr:]] = OpTypePointer CrossWorkgroup %[[#Int]] +; CHECK-SPIRV-DAG: %[[#PrivateIntPtr:]] = OpTypePointer Function %[[#Int]] +; CHECK-SPIRV-DAG: %[[#GenericIntPtr:]] = OpTypePointer Generic %[[#Int]] + +%id = type { %arr } +%arr = type { [1 x i64] } + +@__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64> + +; Mangling + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericCharPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#GlobalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#LocalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#PrivateCharPtr]] +; CHECK-SPIRV: OpFunctionEnd + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericCharPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#GlobalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#LocalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#PrivateCharPtr]] +; CHECK-SPIRV: OpFunctionEnd + +define spir_kernel void @test1(ptr addrspace(1) %_arg_GlobalA, ptr byval(%id) %_arg_GlobalId, ptr addrspace(3) %_arg_LocalA) { +entry: + %var = alloca i32 + %p0 = load i64, ptr %_arg_GlobalId + %add = getelementptr inbounds i32, ptr addrspace(1) %_arg_GlobalA, i64 %p0 + %p2 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId + %idx = getelementptr inbounds i32, ptr addrspace(1) %add, i64 %p2 + %var1 = addrspacecast ptr addrspace(1) %idx to ptr addrspace(4) + %var2 = addrspacecast ptr addrspace(3) %_arg_LocalA to ptr addrspace(4) + %var3 = addrspacecast ptr %var to ptr addrspace(4) + %G = call spir_func ptr addrspace(1) @_Z33__spirv_GenericCastToPtr_ToGlobalPvi(ptr addrspace(4) %var1, i32 5) + %L = call spir_func ptr addrspace(3) @_Z32__spirv_GenericCastToPtr_ToLocalPvi(ptr addrspace(4) %var2, i32 4) + %P = call spir_func ptr @_Z34__spirv_GenericCastToPtr_ToPrivatePvi(ptr addrspace(4) %var3, i32 7) + ret void +} + +define spir_kernel void @test2(ptr addrspace(1) %_arg_GlobalA, ptr byval(%id) %_arg_GlobalId, ptr addrspace(3) %_arg_LocalA) { +entry: + %var = alloca i32 + %p0 = load i64, ptr %_arg_GlobalId + %add = getelementptr inbounds i32, ptr addrspace(1) %_arg_GlobalA, i64 %p0 + %p2 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId + %idx = getelementptr inbounds i32, ptr addrspace(1) %add, i64 %p2 + %var1 = addrspacecast ptr addrspace(1) %idx to ptr addrspace(4) + %var2 = addrspacecast ptr addrspace(3) %_arg_LocalA to ptr addrspace(4) + %var3 = addrspacecast ptr %var to ptr addrspace(4) + %G = call spir_func ptr addrspace(1) @_Z9to_globalPv(ptr addrspace(4) %var1) + %L = call spir_func ptr addrspace(3) @_Z8to_localPv(ptr addrspace(4) %var2) + %P = call spir_func ptr @_Z10to_privatePv(ptr addrspace(4) %var3) + ret void +} + +declare spir_func ptr addrspace(1) @_Z33__spirv_GenericCastToPtr_ToGlobalPvi(ptr addrspace(4), i32) +declare spir_func ptr addrspace(3) @_Z32__spirv_GenericCastToPtr_ToLocalPvi(ptr addrspace(4), i32) +declare spir_func ptr @_Z34__spirv_GenericCastToPtr_ToPrivatePvi(ptr addrspace(4), i32) + +declare spir_func ptr addrspace(1) @_Z9to_globalPv(ptr addrspace(4)) +declare spir_func ptr addrspace(3) @_Z8to_localPv(ptr addrspace(4)) +declare spir_func ptr @_Z10to_privatePv(ptr addrspace(4)) + +; No mangling + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericCharPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#GlobalIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#LocalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#PrivateIntPtr]] +; CHECK-SPIRV: OpFunctionEnd + +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericCharPtr]] +; CHECK-SPIRV: OpPtrCastToGeneric %[[#GenericIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#GlobalIntPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#LocalCharPtr]] +; CHECK-SPIRV: OpGenericCastToPtr %[[#PrivateIntPtr]] +; CHECK-SPIRV: OpFunctionEnd + +define spir_kernel void @test3(ptr addrspace(1) %_arg_GlobalA, ptr byval(%id) %_arg_GlobalId, ptr addrspace(3) %_arg_LocalA) { +entry: + %var = alloca i32 + %p0 = load i64, ptr %_arg_GlobalId + %add = getelementptr inbounds i32, ptr addrspace(1) %_arg_GlobalA, i64 %p0 + %p2 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId + %idx = getelementptr inbounds i32, ptr addrspace(1) %add, i64 %p2 + %var1 = addrspacecast ptr addrspace(1) %idx to ptr addrspace(4) + %var2 = addrspacecast ptr addrspace(3) %_arg_LocalA to ptr addrspace(4) + %var3 = addrspacecast ptr %var to ptr addrspace(4) + %G = call spir_func ptr addrspace(1) @__spirv_GenericCastToPtr_ToGlobal(ptr addrspace(4) %var1, i32 5) + %L = call spir_func ptr addrspace(3) @__spirv_GenericCastToPtr_ToLocal(ptr addrspace(4) %var2, i32 4) + %P = call spir_func ptr @__spirv_GenericCastToPtr_ToPrivate(ptr addrspace(4) %var3, i32 7) + ret void +} + +define spir_kernel void @test4(ptr addrspace(1) %_arg_GlobalA, ptr byval(%id) %_arg_GlobalId, ptr addrspace(3) %_arg_LocalA) { +entry: + %var = alloca i32 + %p0 = load i64, ptr %_arg_GlobalId + %add = getelementptr inbounds i32, ptr addrspace(1) %_arg_GlobalA, i64 %p0 + %p2 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId + %idx = getelementptr inbounds i32, ptr addrspace(1) %add, i64 %p2 + %var1 = addrspacecast ptr addrspace(1) %idx to ptr addrspace(4) + %var2 = addrspacecast ptr addrspace(3) %_arg_LocalA to ptr addrspace(4) + %var3 = addrspacecast ptr %var to ptr addrspace(4) + %G = call spir_func ptr addrspace(1) @to_global(ptr addrspace(4) %var1) + %L = call spir_func ptr addrspace(3) @to_local(ptr addrspace(4) %var2) + %P = call spir_func ptr @to_private(ptr addrspace(4) %var3) + ret void +} + +declare spir_func ptr addrspace(1) @__spirv_GenericCastToPtr_ToGlobal(ptr addrspace(4), i32) +declare spir_func ptr addrspace(3) @__spirv_GenericCastToPtr_ToLocal(ptr addrspace(4), i32) +declare spir_func ptr @__spirv_GenericCastToPtr_ToPrivate(ptr addrspace(4), i32) + +declare spir_func ptr addrspace(1) @to_global(ptr addrspace(4)) +declare spir_func ptr addrspace(3) @to_local(ptr addrspace(4)) +declare spir_func ptr @to_private(ptr addrspace(4)) diff --git a/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll new file mode 100644 index 00000000000000..437e161864eca5 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll @@ -0,0 +1,56 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOEXT +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOEXT +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT + +; CHECK-DAG: OpName %[[#Struct:]] "struct" +; CHECK-DAG: OpName %[[#Arg:]] "arg" +; CHECK-DAG: OpName %[[#QArg:]] "qarg" +; CHECK-DAG: OpName %[[#R:]] "r" +; CHECK-DAG: OpName %[[#Q:]] "q" +; CHECK-DAG: OpName %[[#Tr:]] "tr" +; CHECK-DAG: OpName %[[#Tq:]] "tq" +; CHECK-DAG: %[[#Struct]] = OpTypeStruct %[[#]] %[[#]] %[[#]] +; CHECK-DAG: %[[#PtrStruct:]] = OpTypePointer CrossWorkgroup %[[#Struct]] +; CHECK-EXT-DAG: %[[#Int40:]] = OpTypeInt 40 0 +; CHECK-EXT-DAG: %[[#Int50:]] = OpTypeInt 50 0 +; CHECK-NOEXT-DAG: %[[#Int40:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#PtrInt40:]] = OpTypePointer CrossWorkgroup %[[#Int40]] + +; CHECK: OpFunction + +; CHECK-EXT: %[[#Tr]] = OpUConvert %[[#Int40]] %[[#R]] +; CHECK-EXT: %[[#Store:]] = OpInBoundsPtrAccessChain %[[#PtrStruct]] %[[#Arg]] %[[#]] +; CHECK-EXT: %[[#StoreAsInt40:]] = OpBitcast %[[#PtrInt40]] %[[#Store]] +; CHECK-EXT: OpStore %[[#StoreAsInt40]] %[[#Tr]] + +; CHECK-NOEXT: %[[#Store:]] = OpInBoundsPtrAccessChain %[[#PtrStruct]] %[[#Arg]] %[[#]] +; CHECK-NOEXT: %[[#StoreAsInt40:]] = OpBitcast %[[#PtrInt40]] %[[#Store]] +; CHECK-NOEXT: OpStore %[[#StoreAsInt40]] %[[#R]] + +; CHECK: OpFunction + +; CHECK-EXT: %[[#Tq]] = OpUConvert %[[#Int40]] %[[#Q]] +; CHECK-EXT: OpStore %[[#QArg]] %[[#Tq]] + +; CHECK-NOEXT: OpStore %[[#QArg]] %[[#Q]] + +%struct = type <{ i32, i8, [3 x i8] }> + +define spir_kernel void @foo(ptr addrspace(1) %arg, i64 %r) { + %tr = trunc i64 %r to i40 + %addr = getelementptr inbounds %struct, ptr addrspace(1) %arg, i64 0 + store i40 %tr, ptr addrspace(1) %addr + ret void +} + +define spir_kernel void @bar(ptr addrspace(1) %qarg, i50 %q) { + %tq = trunc i50 %q to i40 + store i40 %tq, ptr addrspace(1) %qarg + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/libcalls.ll b/llvm/test/CodeGen/WebAssembly/libcalls.ll index 4f57c347a1a335..70f000664d388a 100644 --- a/llvm/test/CodeGen/WebAssembly/libcalls.ll +++ b/llvm/test/CodeGen/WebAssembly/libcalls.ll @@ -12,6 +12,7 @@ declare fp128 @llvm.nearbyint.f128(fp128) declare fp128 @llvm.pow.f128(fp128, fp128) declare fp128 @llvm.powi.f128.i32(fp128, i32) +declare double @llvm.tan.f64(double) declare double @llvm.cos.f64(double) declare double @llvm.log10.f64(double) declare double @llvm.pow.f64(double, double) @@ -240,42 +241,44 @@ define double @f64libcalls(double %x, double %y, i32 %z) { ; CHECK: .functype f64libcalls (f64, f64, i32) -> (f64) ; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: global.get $push11=, __stack_pointer -; CHECK-NEXT: i32.const $push12=, 16 -; CHECK-NEXT: i32.sub $push18=, $pop11, $pop12 -; CHECK-NEXT: local.tee $push17=, 3, $pop18 -; CHECK-NEXT: global.set __stack_pointer, $pop17 -; CHECK-NEXT: local.get $push22=, 0 -; CHECK-NEXT: local.get $push19=, 0 -; CHECK-NEXT: call $push0=, cos, $pop19 -; CHECK-NEXT: call $push1=, log10, $pop0 -; CHECK-NEXT: local.get $push20=, 1 -; CHECK-NEXT: call $push2=, pow, $pop1, $pop20 -; CHECK-NEXT: local.get $push21=, 2 -; CHECK-NEXT: call $push3=, __powidf2, $pop2, $pop21 -; CHECK-NEXT: call $push4=, log, $pop3 -; CHECK-NEXT: call $push5=, exp, $pop4 -; CHECK-NEXT: call $push6=, exp10, $pop5 -; CHECK-NEXT: call $push7=, cbrt, $pop6 -; CHECK-NEXT: call $push8=, lround, $pop7 -; CHECK-NEXT: call $push9=, ldexp, $pop22, $pop8 -; CHECK-NEXT: local.get $push23=, 3 -; CHECK-NEXT: i32.const $push15=, 12 -; CHECK-NEXT: i32.add $push16=, $pop23, $pop15 -; CHECK-NEXT: call $push24=, frexp, $pop9, $pop16 -; CHECK-NEXT: local.set 0, $pop24 -; CHECK-NEXT: local.get $push25=, 3 -; CHECK-NEXT: i32.load $push10=, 12($pop25) -; CHECK-NEXT: call escape_value, $pop10 -; CHECK-NEXT: local.get $push26=, 3 +; CHECK-NEXT: global.get $push12=, __stack_pointer ; CHECK-NEXT: i32.const $push13=, 16 -; CHECK-NEXT: i32.add $push14=, $pop26, $pop13 -; CHECK-NEXT: global.set __stack_pointer, $pop14 -; CHECK-NEXT: local.get $push27=, 0 -; CHECK-NEXT: return $pop27 +; CHECK-NEXT: i32.sub $push19=, $pop12, $pop13 +; CHECK-NEXT: local.tee $push18=, 3, $pop19 +; CHECK-NEXT: global.set __stack_pointer, $pop18 +; CHECK-NEXT: local.get $push23=, 0 +; CHECK-NEXT: local.get $push20=, 0 +; CHECK-NEXT: call $push0=, tan, $pop20 +; CHECK-NEXT: call $push1=, cos, $pop0 +; CHECK-NEXT: call $push2=, log10, $pop1 +; CHECK-NEXT: local.get $push21=, 1 +; CHECK-NEXT: call $push3=, pow, $pop2, $pop21 +; CHECK-NEXT: local.get $push22=, 2 +; CHECK-NEXT: call $push4=, __powidf2, $pop3, $pop22 +; CHECK-NEXT: call $push5=, log, $pop4 +; CHECK-NEXT: call $push6=, exp, $pop5 +; CHECK-NEXT: call $push7=, exp10, $pop6 +; CHECK-NEXT: call $push8=, cbrt, $pop7 +; CHECK-NEXT: call $push9=, lround, $pop8 +; CHECK-NEXT: call $push10=, ldexp, $pop23, $pop9 +; CHECK-NEXT: local.get $push24=, 3 +; CHECK-NEXT: i32.const $push16=, 12 +; CHECK-NEXT: i32.add $push17=, $pop24, $pop16 +; CHECK-NEXT: call $push25=, frexp, $pop10, $pop17 +; CHECK-NEXT: local.set 0, $pop25 +; CHECK-NEXT: local.get $push26=, 3 +; CHECK-NEXT: i32.load $push11=, 12($pop26) +; CHECK-NEXT: call escape_value, $pop11 +; CHECK-NEXT: local.get $push27=, 3 +; CHECK-NEXT: i32.const $push14=, 16 +; CHECK-NEXT: i32.add $push15=, $pop27, $pop14 +; CHECK-NEXT: global.set __stack_pointer, $pop15 +; CHECK-NEXT: local.get $push28=, 0 +; CHECK-NEXT: return $pop28 - %a = call double @llvm.cos.f64(double %x) + %k = call double @llvm.tan.f64(double %x) + %a = call double @llvm.cos.f64(double %k) %b = call double @llvm.log10.f64(double %a) %c = call double @llvm.pow.f64(double %b, double %y) %d = call double @llvm.powi.f64.i32(double %c, i32 %z) diff --git a/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll index feaa38a7600a21..00ffea903079ef 100644 --- a/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll +++ b/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll @@ -8,8 +8,8 @@ define i16 @test(ptr %tmp179) nounwind { ; CHECK-NEXT: movzwl (%eax), %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: andl $64512, %ecx ## imm = 0xFC00 -; CHECK-NEXT: cmpl $32768, %ecx ## imm = 0x8000 -; CHECK-NEXT: jne LBB0_2 +; CHECK-NEXT: negw %cx +; CHECK-NEXT: jno LBB0_2 ; CHECK-NEXT: ## %bb.1: ## %bb189 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index dde877c5bb61e5..bae140abdf6b1a 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -783,8 +783,7 @@ define i32 @test_minsigned_i32(i32 %a0, i32 %a1) nounwind { ; X64-NEXT: movl %edi, %eax ; X64-NEXT: negl %eax ; X64-NEXT: cmovsl %edi, %eax -; X64-NEXT: cmpl $-2147483648, %edi # imm = 0x80000000 -; X64-NEXT: cmovel %esi, %eax +; X64-NEXT: cmovol %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: test_minsigned_i32: @@ -793,11 +792,7 @@ define i32 @test_minsigned_i32(i32 %a0, i32 %a1) nounwind { ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: negl %eax ; X86-NEXT: cmovsl %ecx, %eax -; X86-NEXT: cmpl $-2147483648, %ecx # imm = 0x80000000 -; X86-NEXT: jne .LBB19_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB19_2: +; X86-NEXT: cmovol {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl %lim = icmp eq i32 %a0, -2147483648 %abs = tail call i32 @llvm.abs.i32(i32 %a0, i1 false) @@ -811,9 +806,7 @@ define i64 @test_minsigned_i64(i64 %a0, i64 %a1) nounwind { ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: negq %rax ; X64-NEXT: cmovsq %rdi, %rax -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: cmpq %rcx, %rdi -; X64-NEXT: cmoveq %rsi, %rax +; X64-NEXT: cmovoq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: test_minsigned_i64: diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index 49797fbefa5973..5c5487815b3360 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -58,8 +58,8 @@ define i32 @combine_sdiv_by_minsigned(i32 %x) { ; CHECK-LABEL: combine_sdiv_by_minsigned: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpl $-2147483648, %edi # imm = 0x80000000 -; CHECK-NEXT: sete %al +; CHECK-NEXT: negl %edi +; CHECK-NEXT: seto %al ; CHECK-NEXT: retq %1 = sdiv i32 %x, -2147483648 ret i32 %1 diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll index 999be0f98b6fc5..532b2c09a9175f 100644 --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -937,15 +937,16 @@ entry: define i1 @is_minus_zero_f(float %x) { ; X86-LABEL: is_minus_zero_f: ; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 -; X86-NEXT: sete %al +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; X86-NEXT: seto %al ; X86-NEXT: retl ; ; X64-LABEL: is_minus_zero_f: ; X64: # %bb.0: # %entry ; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 -; X64-NEXT: sete %al +; X64-NEXT: negl %eax +; X64-NEXT: seto %al ; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 32) ; 0x20 = "-zero" @@ -955,15 +956,16 @@ entry: define i1 @not_is_minus_zero_f(float %x) { ; X86-LABEL: not_is_minus_zero_f: ; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 -; X86-NEXT: setne %al +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setno %al ; X86-NEXT: retl ; ; X64-LABEL: not_is_minus_zero_f: ; X64: # %bb.0: # %entry ; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 -; X64-NEXT: setne %al +; X64-NEXT: negl %eax +; X64-NEXT: setno %al ; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 991) ; ~0x20 = ~"-zero" diff --git a/llvm/test/CodeGen/X86/lsr-overflow.ll b/llvm/test/CodeGen/X86/lsr-overflow.ll index 09c1c07ef3de01..79440c282be758 100644 --- a/llvm/test/CodeGen/X86/lsr-overflow.ll +++ b/llvm/test/CodeGen/X86/lsr-overflow.ll @@ -4,9 +4,9 @@ ; The comparison uses the pre-inc value, which could lead LSR to ; try to compute -INT64_MIN. -; CHECK: movabsq $-9223372036854775808, %rax -; CHECK: cmpq %rax, -; CHECK: sete %al +; CHECK-NOT: movabsq $-9223372036854775808, %rax +; CHECK: negq %r +; CHECK-NEXT: seto %al declare i64 @bar() diff --git a/llvm/test/CodeGen/X86/shrink-compare-pgso.ll b/llvm/test/CodeGen/X86/shrink-compare-pgso.ll index 254b8fe3fc6e30..5a15ee36c07263 100644 --- a/llvm/test/CodeGen/X86/shrink-compare-pgso.ll +++ b/llvm/test/CodeGen/X86/shrink-compare-pgso.ll @@ -265,8 +265,8 @@ if.end: define dso_local void @test_sext_i8_icmp_neg128(i8 %x) nounwind !prof !14 { ; CHECK-LABEL: test_sext_i8_icmp_neg128: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cmpb $-128, %dil -; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: negb %dil +; CHECK-NEXT: jo bar # TAILCALL ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/shrink-compare.ll b/llvm/test/CodeGen/X86/shrink-compare.ll index 840167ff9f4a0c..1a61451c26a03c 100644 --- a/llvm/test/CodeGen/X86/shrink-compare.ll +++ b/llvm/test/CodeGen/X86/shrink-compare.ll @@ -265,8 +265,8 @@ if.end: define dso_local void @test_sext_i8_icmp_neg128(i8 %x) nounwind minsize { ; CHECK-LABEL: test_sext_i8_icmp_neg128: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cmpb $-128, %dil -; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: negb %dil +; CHECK-NEXT: jo bar # TAILCALL ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq entry: diff --git a/llvm/test/DebugInfo/symbolize-gnu-debuglink-fallback.test b/llvm/test/DebugInfo/symbolize-gnu-debuglink-fallback.test index 43d5a2c818fac3..c0d6f83e4af0d5 100644 --- a/llvm/test/DebugInfo/symbolize-gnu-debuglink-fallback.test +++ b/llvm/test/DebugInfo/symbolize-gnu-debuglink-fallback.test @@ -1,4 +1,4 @@ -# REQUIRES: shell +# UNSUPPORTED: system-windows # Ensures that .debuglink can fallback to a separate location. This is normally # /usr/lib/debug (or /usr/libdata/debug for NetBSD), but can be configured on # the command line (mainly for testing). diff --git a/llvm/test/Other/cgscc-refscc-mutation-order.ll b/llvm/test/Other/cgscc-refscc-mutation-order.ll index 13a46503c1f4c9..aa207357157633 100644 --- a/llvm/test/Other/cgscc-refscc-mutation-order.ll +++ b/llvm/test/Other/cgscc-refscc-mutation-order.ll @@ -15,8 +15,6 @@ ; CHECK-NOT: InstCombinePass ; CHECK: Running pass: InstCombinePass on f4 ; CHECK-NOT: InstCombinePass -; CHECK: Running pass: InstCombinePass on f1 -; CHECK-NOT: InstCombinePass @a1 = alias void (), ptr @f1 @a2 = alias void (), ptr @f2 diff --git a/llvm/test/Other/devirt-invalidated.ll b/llvm/test/Other/devirt-invalidated.ll index c3ed5e53b3b04c..7926641dda97bb 100644 --- a/llvm/test/Other/devirt-invalidated.ll +++ b/llvm/test/Other/devirt-invalidated.ll @@ -1,8 +1,6 @@ ; RUN: opt -passes='devirt<0>(inline)' < %s -S | FileCheck %s -; CHECK-NOT: internal ; CHECK: define void @e() -; CHECK-NOT: internal define void @e() { entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll index 2df81d6cb1832d..1c34fff8dd7554 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -37,7 +37,6 @@ define internal i32 @caller(ptr %B) { ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 -; CGSCC-NEXT: [[A2:%.*]] = alloca i8, i32 0, align 4 ; CGSCC-NEXT: [[A1:%.*]] = alloca i8, i32 0, align 4 ; CGSCC-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll index 7c28de24beea27..b42647840f7cfc 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -54,7 +54,6 @@ define internal i32 @caller(ptr %B) { ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 -; CGSCC-NEXT: [[A2:%.*]] = alloca i8, i32 0, align 4 ; CGSCC-NEXT: [[A1:%.*]] = alloca i8, i32 0, align 4 ; CGSCC-NEXT: [[C:%.*]] = call i32 @test(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]] ; CGSCC-NEXT: ret i32 0 diff --git a/llvm/test/Transforms/Inline/cgscc-cycle-debug.ll b/llvm/test/Transforms/Inline/cgscc-cycle-debug.ll index 40a6b0577e7dd0..e79700e8dac624 100644 --- a/llvm/test/Transforms/Inline/cgscc-cycle-debug.ll +++ b/llvm/test/Transforms/Inline/cgscc-cycle-debug.ll @@ -13,7 +13,6 @@ ; CHECK: Running an SCC pass across the RefSCC: [(test1_a, test1_b, test1_c)] ; CHECK: Enqueuing the existing SCC in the worklist:(test1_b) ; CHECK: Enqueuing a newly formed SCC:(test1_c) -; CHECK: Enqueuing a new RefSCC in the update worklist: [(test1_b)] ; CHECK: Switch an internal ref edge to a call edge from 'test1_a' to 'test1_c' ; CHECK: Switch an internal ref edge to a call edge from 'test1_a' to 'test1_a' ; CHECK: Re-running SCC passes after a refinement of the current SCC: (test1_c, test1_a) diff --git a/llvm/test/tools/llvm-cov/gcov/basic.test b/llvm/test/tools/llvm-cov/gcov/basic.test index 5313fe2d7a69a1..7557739add8ba5 100644 --- a/llvm/test/tools/llvm-cov/gcov/basic.test +++ b/llvm/test/tools/llvm-cov/gcov/basic.test @@ -3,7 +3,7 @@ # Test fails on Windows where internal shell is used due to path separator # mismatches. -REQUIRES: shell +UNSUPPORTED: system-windows RUN: rm -rf %t RUN: mkdir %t diff --git a/llvm/test/tools/llvm-rc/windres-prefix.test b/llvm/test/tools/llvm-rc/windres-prefix.test index 7dda51d0635232..4c53fdfc3db65e 100644 --- a/llvm/test/tools/llvm-rc/windres-prefix.test +++ b/llvm/test/tools/llvm-rc/windres-prefix.test @@ -1,3 +1,5 @@ +; REQUIRES: shell + ; RUN: rm -rf %t && mkdir %t ; Check that a triple prefix on the executable gets picked up as target triple. diff --git a/llvm/test/tools/llvm-rc/windres-preproc.test b/llvm/test/tools/llvm-rc/windres-preproc.test index 13f82299a074bb..52427862e760b8 100644 --- a/llvm/test/tools/llvm-rc/windres-preproc.test +++ b/llvm/test/tools/llvm-rc/windres-preproc.test @@ -1,7 +1,6 @@ ;; Some quoted arguments below don't work properly on Windows when llvm-lit -;; invokes the cmd shell to run the commands. Just require running in a -;; posix shell, to keep being able to test this corner case on Unix at least. -; REQUIRES: shell +;; invokes the cmd shell to run the commands. +; UNSUPPORTED: system-windows ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\\\"foo bar\\\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\\\"baz baz\\\"" -DFOO5=\"bar\" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK1 ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\"foo bar\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\"baz baz\"" "-DFOO5=bar" %p/Inputs/empty.rc %t.res --use-temp-file | FileCheck %s --check-prefix=CHECK1 diff --git a/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp b/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp index b33567dd602b0e..aab148c12c4164 100644 --- a/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp +++ b/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp @@ -1659,18 +1659,16 @@ TEST_F(CGSCCPassManagerTest, TestUpdateCGAndAnalysisManagerForPasses9) { Function *FnF = M->getFunction("f"); // Use the CallGraphUpdater to update the call graph. - { - CallGraphUpdater CGU; - CGU.initialize(CG, C, AM, UR); - ASSERT_NO_FATAL_FAILURE(CGU.removeFunction(*FnF)); - ASSERT_EQ(M->getFunctionList().size(), 6U); - } - ASSERT_EQ(M->getFunctionList().size(), 5U); + CallGraphUpdater CGU; + CGU.initialize(CG, C, AM, UR); + ASSERT_NO_FATAL_FAILURE(CGU.removeFunction(*FnF)); + ASSERT_EQ(M->getFunctionList().size(), 6U); })); ModulePassManager MPM; MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); MPM.run(*M, MAM); + ASSERT_EQ(M->getFunctionList().size(), 5U); } TEST_F(CGSCCPassManagerTest, TestUpdateCGAndAnalysisManagerForPasses10) { diff --git a/llvm/unittests/Analysis/LazyCallGraphTest.cpp b/llvm/unittests/Analysis/LazyCallGraphTest.cpp index 69af7d92c7cf0d..64b6ccddc53b0c 100644 --- a/llvm/unittests/Analysis/LazyCallGraphTest.cpp +++ b/llvm/unittests/Analysis/LazyCallGraphTest.cpp @@ -1169,7 +1169,7 @@ TEST(LazyCallGraphTest, InlineAndDeleteFunction) { LazyCallGraph::SCC &NewDC = *NewCs.begin(); EXPECT_EQ(&NewDC, CG.lookupSCC(D1)); EXPECT_EQ(&NewDC, CG.lookupSCC(D3)); - auto NewRCs = DRC.removeInternalRefEdge(D1, {&D2}); + auto NewRCs = DRC.removeInternalRefEdges({{&D1, &D2}}); ASSERT_EQ(2u, NewRCs.size()); LazyCallGraph::RefSCC &NewDRC = *NewRCs[0]; EXPECT_EQ(&NewDRC, CG.lookupRefSCC(D1)); @@ -1186,7 +1186,8 @@ TEST(LazyCallGraphTest, InlineAndDeleteFunction) { EXPECT_TRUE(D2RC.isParentOf(NewDRC)); // Now that we've updated the call graph, D2 is dead, so remove it. - CG.removeDeadFunction(D2F); + CG.markDeadFunction(D2F); + CG.removeDeadFunctions({&D2F}); // Check that the graph still looks the same. EXPECT_EQ(&ARC, CG.lookupRefSCC(A1)); @@ -1344,7 +1345,7 @@ TEST(LazyCallGraphTest, InternalEdgeRemoval) { // Remove the edge from b -> a, which should leave the 3 functions still in // a single connected component because of a -> b -> c -> a. SmallVector NewRCs = - RC.removeInternalRefEdge(B, {&A}); + RC.removeInternalRefEdges({{&B, &A}}); EXPECT_EQ(0u, NewRCs.size()); EXPECT_EQ(&RC, CG.lookupRefSCC(A)); EXPECT_EQ(&RC, CG.lookupRefSCC(B)); @@ -1360,7 +1361,7 @@ TEST(LazyCallGraphTest, InternalEdgeRemoval) { // Remove the edge from c -> a, which should leave 'a' in the original RefSCC // and form a new RefSCC for 'b' and 'c'. - NewRCs = RC.removeInternalRefEdge(C, {&A}); + NewRCs = RC.removeInternalRefEdges({{&C, &A}}); ASSERT_EQ(2u, NewRCs.size()); LazyCallGraph::RefSCC &BCRC = *NewRCs[0]; LazyCallGraph::RefSCC &ARC = *NewRCs[1]; @@ -1425,7 +1426,7 @@ TEST(LazyCallGraphTest, InternalMultiEdgeRemoval) { // Remove the edges from b -> a and b -> c, leaving b in its own RefSCC. SmallVector NewRCs = - RC.removeInternalRefEdge(B, {&A, &C}); + RC.removeInternalRefEdges({{&B, &A}, {&B, &C}}); ASSERT_EQ(2u, NewRCs.size()); LazyCallGraph::RefSCC &BRC = *NewRCs[0]; @@ -1494,7 +1495,7 @@ TEST(LazyCallGraphTest, InternalNoOpEdgeRemoval) { // Remove the edge from a -> c which doesn't change anything. SmallVector NewRCs = - RC.removeInternalRefEdge(AN, {&CN}); + RC.removeInternalRefEdges({{&AN, &CN}}); EXPECT_EQ(0u, NewRCs.size()); EXPECT_EQ(&RC, CG.lookupRefSCC(AN)); EXPECT_EQ(&RC, CG.lookupRefSCC(BN)); @@ -1509,8 +1510,8 @@ TEST(LazyCallGraphTest, InternalNoOpEdgeRemoval) { // Remove the edge from b -> a and c -> b; again this doesn't change // anything. - NewRCs = RC.removeInternalRefEdge(BN, {&AN}); - NewRCs = RC.removeInternalRefEdge(CN, {&BN}); + NewRCs = RC.removeInternalRefEdges({{&BN, &AN}}); + NewRCs = RC.removeInternalRefEdges({{&CN, &BN}}); EXPECT_EQ(0u, NewRCs.size()); EXPECT_EQ(&RC, CG.lookupRefSCC(AN)); EXPECT_EQ(&RC, CG.lookupRefSCC(BN)); @@ -2163,7 +2164,8 @@ TEST(LazyCallGraphTest, RemoveFunctionWithSpuriousRef) { // Now delete 'dead'. There are no uses of this function but there are // spurious references. - CG.removeDeadFunction(DeadN.getFunction()); + CG.markDeadFunction(DeadN.getFunction()); + CG.removeDeadFunctions({&DeadN.getFunction()}); // The only observable change should be that the RefSCC is gone from the // postorder sequence. @@ -2212,7 +2214,8 @@ TEST(LazyCallGraphTest, RemoveFunctionWithSpuriousRefRecursive) { // Now delete 'a'. There are no uses of this function but there are // spurious references. - CG.removeDeadFunction(AN.getFunction()); + CG.markDeadFunction(AN.getFunction()); + CG.removeDeadFunctions({&AN.getFunction()}); // The only observable change should be that the RefSCC is gone from the // postorder sequence. @@ -2269,7 +2272,8 @@ TEST(LazyCallGraphTest, RemoveFunctionWithSpuriousRefRecursive2) { // Now delete 'a'. There are no uses of this function but there are // spurious references. - CG.removeDeadFunction(AN.getFunction()); + CG.markDeadFunction(AN.getFunction()); + CG.removeDeadFunctions({&AN.getFunction()}); // The only observable change should be that the RefSCC is gone from the // postorder sequence. @@ -2320,7 +2324,8 @@ TEST(LazyCallGraphTest, RemoveFunctionWithSpuriousRefRecursive3) { // Now delete 'a'. There are no uses of this function but there are // spurious references. - CG.removeDeadFunction(AN.getFunction()); + CG.markDeadFunction(AN.getFunction()); + CG.removeDeadFunctions({&AN.getFunction()}); // The only observable change should be that the RefSCC is gone from the // postorder sequence. diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 323470940fec5c..60a0402103ce05 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -255,6 +256,9 @@ unsigned SubtargetEmitter::FeatureKeyValues( llvm::sort(FeatureList, LessRecordFieldName()); + // Check that there are no duplicate keys + std::set UniqueKeys; + // Begin feature table OS << "// Sorted (by key) array of values for CPU features.\n" << "extern const llvm::SubtargetFeatureKV " << Target @@ -283,6 +287,10 @@ unsigned SubtargetEmitter::FeatureKeyValues( OS << " },\n"; ++NumFeatures; + + if (!UniqueKeys.insert(CommandLineName).second) + PrintFatalError("Duplicate key in SubtargetFeatureKV: " + + CommandLineName); } // End feature table diff --git a/mlir/include/mlir-c/Bindings/Python/Interop.h b/mlir/include/mlir-c/Bindings/Python/Interop.h index 0a36e97c2ae683..a33190c380d37d 100644 --- a/mlir/include/mlir-c/Bindings/Python/Interop.h +++ b/mlir/include/mlir-c/Bindings/Python/Interop.h @@ -39,6 +39,7 @@ #include "mlir-c/IR.h" #include "mlir-c/IntegerSet.h" #include "mlir-c/Pass.h" +#include "mlir-c/Rewrite.h" // The 'mlir' Python package is relocatable and supports co-existing in multiple // projects. Each project must define its outer package prefix with this define @@ -284,6 +285,26 @@ static inline MlirModule mlirPythonCapsuleToModule(PyObject *capsule) { return module; } +/** Creates a capsule object encapsulating the raw C-API + * MlirFrozenRewritePatternSet. + * The returned capsule does not extend or affect ownership of any Python + * objects that reference the module in any way. */ +static inline PyObject * +mlirPythonFrozenRewritePatternSetToCapsule(MlirFrozenRewritePatternSet pm) { + return PyCapsule_New(MLIR_PYTHON_GET_WRAPPED_POINTER(pm), + MLIR_PYTHON_CAPSULE_PASS_MANAGER, NULL); +} + +/** Extracts an MlirFrozenRewritePatternSet from a capsule as produced from + * mlirPythonFrozenRewritePatternSetToCapsule. If the capsule is not of the + * right type, then a null module is returned. */ +static inline MlirFrozenRewritePatternSet +mlirPythonCapsuleToFrozenRewritePatternSet(PyObject *capsule) { + void *ptr = PyCapsule_GetPointer(capsule, MLIR_PYTHON_CAPSULE_PASS_MANAGER); + MlirFrozenRewritePatternSet pm = {ptr}; + return pm; +} + /** Creates a capsule object encapsulating the raw C-API MlirPassManager. * The returned capsule does not extend or affect ownership of any Python * objects that reference the module in any way. */ diff --git a/mlir/include/mlir-c/Rewrite.h b/mlir/include/mlir-c/Rewrite.h new file mode 100644 index 00000000000000..45218a1cd4ebd5 --- /dev/null +++ b/mlir/include/mlir-c/Rewrite.h @@ -0,0 +1,60 @@ +//===-- mlir-c/Rewrite.h - Helpers for C API to Rewrites ----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM +// Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header declares the registration and creation method for +// rewrite patterns. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_C_REWRITE_H +#define MLIR_C_REWRITE_H + +#include "mlir-c/IR.h" +#include "mlir-c/Support.h" +#include "mlir/Config/mlir-config.h" + +//===----------------------------------------------------------------------===// +/// Opaque type declarations (see mlir-c/IR.h for more details). +//===----------------------------------------------------------------------===// + +#define DEFINE_C_API_STRUCT(name, storage) \ + struct name { \ + storage *ptr; \ + }; \ + typedef struct name name + +DEFINE_C_API_STRUCT(MlirFrozenRewritePatternSet, void); +DEFINE_C_API_STRUCT(MlirGreedyRewriteDriverConfig, void); +DEFINE_C_API_STRUCT(MlirRewritePatternSet, void); + +MLIR_CAPI_EXPORTED MlirFrozenRewritePatternSet +mlirFreezeRewritePattern(MlirRewritePatternSet op); + +MLIR_CAPI_EXPORTED void +mlirFrozenRewritePatternSetDestroy(MlirFrozenRewritePatternSet op); + +MLIR_CAPI_EXPORTED MlirLogicalResult mlirApplyPatternsAndFoldGreedily( + MlirModule op, MlirFrozenRewritePatternSet patterns, + MlirGreedyRewriteDriverConfig); + +#if MLIR_ENABLE_PDL_IN_PATTERNMATCH +DEFINE_C_API_STRUCT(MlirPDLPatternModule, void); + +MLIR_CAPI_EXPORTED MlirPDLPatternModule +mlirPDLPatternModuleFromModule(MlirModule op); + +MLIR_CAPI_EXPORTED void mlirPDLPatternModuleDestroy(MlirPDLPatternModule op); + +MLIR_CAPI_EXPORTED MlirRewritePatternSet +mlirRewritePatternSetFromPDLPatternModule(MlirPDLPatternModule op); +#endif // MLIR_ENABLE_PDL_IN_PATTERNMATCH + +#undef DEFINE_C_API_STRUCT + +#endif // MLIR_C_REWRITE_H diff --git a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h b/mlir/include/mlir/Bindings/Python/PybindAdaptors.h index d8f22c7aa17096..ebf50109f72f23 100644 --- a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h +++ b/mlir/include/mlir/Bindings/Python/PybindAdaptors.h @@ -198,6 +198,27 @@ struct type_caster { }; }; +/// Casts object <-> MlirFrozenRewritePatternSet. +template <> +struct type_caster { + PYBIND11_TYPE_CASTER(MlirFrozenRewritePatternSet, + _("MlirFrozenRewritePatternSet")); + bool load(handle src, bool) { + py::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToFrozenRewritePatternSet(capsule.ptr()); + return value.ptr != nullptr; + } + static handle cast(MlirFrozenRewritePatternSet v, return_value_policy, + handle) { + py::object capsule = py::reinterpret_steal( + mlirPythonFrozenRewritePatternSetToCapsule(v)); + return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("rewrite")) + .attr("FrozenRewritePatternSet") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .release(); + }; +}; + /// Casts object <-> MlirOperation. template <> struct type_caster { diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index d6d038ef65bdf4..3043a0c4dc4109 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -248,6 +248,12 @@ std::unique_ptr createSparsificationAndBufferizationPass( bool enableBufferInitialization, unsigned vectorLength, bool enableVLAVectorization, bool enableSIMDIndex32, bool enableGPULibgen); +//===----------------------------------------------------------------------===// +// Sparse Iteration Transform Passes +//===----------------------------------------------------------------------===// + +std::unique_ptr createSparseSpaceCollapsePass(); + //===----------------------------------------------------------------------===// // Registration. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td index 2f844cee5ff528..196110f55571d2 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -462,6 +462,34 @@ def SparsificationAndBufferization : Pass<"sparsification-and-bufferization", "M "sparse_tensor::SparseTensorDialect", "vector::VectorDialect" ]; + // Important optimization options are made visible to the mini-pipeline + // so that clients can set these (when not using the full pipeline). + let options = [ + Option<"vectorLength", "vl", "int32_t", "0", + "Set the vector length (use 0 to disable vectorization)">, + Option<"enableVLAVectorization", "enable-vla-vectorization", "bool", "false", + "Enable vector length agnostic vectorization">, + Option<"enableSIMDIndex32", "enable-simd-index32", "bool", "false", + "Enable i32 indexing into vectors (for efficient gather/scatter)">, + Option<"enableGPULibgen", "enable-gpu-libgen", "bool", "false", + "Enable GPU acceleration by means of direct library calls">, + ]; +} + +//===----------------------------------------------------------------------===// +// Sparse Iteration Transform Passes +//===----------------------------------------------------------------------===// + +def SparseSpaceCollapse : Pass<"sparse-space-collapse", "func::FuncOp"> { + let summary = "sparse space collapsing pass"; + let description = [{ + This pass collapses consecutive sparse spaces (extracted from the same tensor) + into one multi-dimensional space. The pass is not yet stablized. + }]; + let constructor = "mlir::createSparseSpaceCollapsePass()"; + let dependentDialects = [ + "sparse_tensor::SparseTensorDialect", + ]; } #endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_PASSES diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h index 60cd39302c6387..172898cfda0c52 100644 --- a/mlir/lib/Bindings/Python/IRModule.h +++ b/mlir/lib/Bindings/Python/IRModule.h @@ -22,6 +22,7 @@ #include "mlir-c/Diagnostics.h" #include "mlir-c/IR.h" #include "mlir-c/IntegerSet.h" +#include "mlir-c/Transforms.h" #include "mlir/Bindings/Python/PybindAdaptors.h" #include "llvm/ADT/DenseMap.h" diff --git a/mlir/lib/Bindings/Python/MainModule.cpp b/mlir/lib/Bindings/Python/MainModule.cpp index 17272472ccca42..8da1ab16a4514b 100644 --- a/mlir/lib/Bindings/Python/MainModule.cpp +++ b/mlir/lib/Bindings/Python/MainModule.cpp @@ -11,6 +11,7 @@ #include "Globals.h" #include "IRModule.h" #include "Pass.h" +#include "Rewrite.h" namespace py = pybind11; using namespace mlir; @@ -116,6 +117,9 @@ PYBIND11_MODULE(_mlir, m) { populateIRInterfaces(irModule); populateIRTypes(irModule); + auto rewriteModule = m.def_submodule("rewrite", "MLIR Rewrite Bindings"); + populateRewriteSubmodule(rewriteModule); + // Define and populate PassManager submodule. auto passModule = m.def_submodule("passmanager", "MLIR Pass Management Bindings"); diff --git a/mlir/lib/Bindings/Python/Rewrite.cpp b/mlir/lib/Bindings/Python/Rewrite.cpp new file mode 100644 index 00000000000000..1d8128be9f0826 --- /dev/null +++ b/mlir/lib/Bindings/Python/Rewrite.cpp @@ -0,0 +1,110 @@ +//===- Rewrite.cpp - Rewrite ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Rewrite.h" + +#include "IRModule.h" +#include "mlir-c/Bindings/Python/Interop.h" +#include "mlir-c/Rewrite.h" +#include "mlir/Config/mlir-config.h" + +namespace py = pybind11; +using namespace mlir; +using namespace py::literals; +using namespace mlir::python; + +namespace { + +#if MLIR_ENABLE_PDL_IN_PATTERNMATCH +/// Owning Wrapper around a PDLPatternModule. +class PyPDLPatternModule { +public: + PyPDLPatternModule(MlirPDLPatternModule module) : module(module) {} + PyPDLPatternModule(PyPDLPatternModule &&other) noexcept + : module(other.module) { + other.module.ptr = nullptr; + } + ~PyPDLPatternModule() { + if (module.ptr != nullptr) + mlirPDLPatternModuleDestroy(module); + } + MlirPDLPatternModule get() { return module; } + +private: + MlirPDLPatternModule module; +}; +#endif // MLIR_ENABLE_PDL_IN_PATTERNMATCH + +/// Owning Wrapper around a FrozenRewritePatternSet. +class PyFrozenRewritePatternSet { +public: + PyFrozenRewritePatternSet(MlirFrozenRewritePatternSet set) : set(set) {} + PyFrozenRewritePatternSet(PyFrozenRewritePatternSet &&other) noexcept + : set(other.set) { + other.set.ptr = nullptr; + } + ~PyFrozenRewritePatternSet() { + if (set.ptr != nullptr) + mlirFrozenRewritePatternSetDestroy(set); + } + MlirFrozenRewritePatternSet get() { return set; } + + pybind11::object getCapsule() { + return py::reinterpret_steal( + mlirPythonFrozenRewritePatternSetToCapsule(get())); + } + + static pybind11::object createFromCapsule(pybind11::object capsule) { + MlirFrozenRewritePatternSet rawPm = + mlirPythonCapsuleToFrozenRewritePatternSet(capsule.ptr()); + if (rawPm.ptr == nullptr) + throw py::error_already_set(); + return py::cast(PyFrozenRewritePatternSet(rawPm), + py::return_value_policy::move); + } + +private: + MlirFrozenRewritePatternSet set; +}; + +} // namespace + +/// Create the `mlir.rewrite` here. +void mlir::python::populateRewriteSubmodule(py::module &m) { + //---------------------------------------------------------------------------- + // Mapping of the top-level PassManager + //---------------------------------------------------------------------------- +#if MLIR_ENABLE_PDL_IN_PATTERNMATCH + py::class_(m, "PDLModule", py::module_local()) + .def(py::init<>([](MlirModule module) { + return mlirPDLPatternModuleFromModule(module); + }), + "module"_a, "Create a PDL module from the given module.") + .def("freeze", [](PyPDLPatternModule &self) { + return new PyFrozenRewritePatternSet(mlirFreezeRewritePattern( + mlirRewritePatternSetFromPDLPatternModule(self.get()))); + }); +#endif // MLIR_ENABLE_PDL_IN_PATTERNMATCg + py::class_(m, "FrozenRewritePatternSet", + py::module_local()) + .def_property_readonly(MLIR_PYTHON_CAPI_PTR_ATTR, + &PyFrozenRewritePatternSet::getCapsule) + .def(MLIR_PYTHON_CAPI_FACTORY_ATTR, + &PyFrozenRewritePatternSet::createFromCapsule); + m.def( + "apply_patterns_and_fold_greedily", + [](MlirModule module, MlirFrozenRewritePatternSet set) { + auto status = mlirApplyPatternsAndFoldGreedily(module, set, {}); + if (mlirLogicalResultIsFailure(status)) + // FIXME: Not sure this is the right error to throw here. + throw py::value_error("pattern application failed to converge"); + }, + "module"_a, "set"_a, + "Applys the given patterns to the given module greedily while folding " + "results."); +} diff --git a/mlir/lib/Bindings/Python/Rewrite.h b/mlir/lib/Bindings/Python/Rewrite.h new file mode 100644 index 00000000000000..997b80adda3038 --- /dev/null +++ b/mlir/lib/Bindings/Python/Rewrite.h @@ -0,0 +1,22 @@ +//===- Rewrite.h - Rewrite Submodules of pybind module --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_BINDINGS_PYTHON_REWRITE_H +#define MLIR_BINDINGS_PYTHON_REWRITE_H + +#include "PybindUtils.h" + +namespace mlir { +namespace python { + +void populateRewriteSubmodule(pybind11::module &m); + +} // namespace python +} // namespace mlir + +#endif // MLIR_BINDINGS_PYTHON_REWRITE_H diff --git a/mlir/lib/CAPI/Transforms/CMakeLists.txt b/mlir/lib/CAPI/Transforms/CMakeLists.txt index 2638025a8c359a..6c67aa09fdf402 100644 --- a/mlir/lib/CAPI/Transforms/CMakeLists.txt +++ b/mlir/lib/CAPI/Transforms/CMakeLists.txt @@ -1,6 +1,9 @@ add_mlir_upstream_c_api_library(MLIRCAPITransforms Passes.cpp + Rewrite.cpp LINK_LIBS PUBLIC + MLIRIR MLIRTransforms + MLIRTransformUtils ) diff --git a/mlir/lib/CAPI/Transforms/Rewrite.cpp b/mlir/lib/CAPI/Transforms/Rewrite.cpp new file mode 100644 index 00000000000000..0de1958398f63e --- /dev/null +++ b/mlir/lib/CAPI/Transforms/Rewrite.cpp @@ -0,0 +1,83 @@ +//===- Rewrite.cpp - C API for Rewrite Patterns ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir-c/Rewrite.h" +#include "mlir-c/Transforms.h" +#include "mlir/CAPI/IR.h" +#include "mlir/CAPI/Support.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Rewrite/FrozenRewritePatternSet.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +using namespace mlir; + +inline mlir::RewritePatternSet &unwrap(MlirRewritePatternSet module) { + assert(module.ptr && "unexpected null module"); + return *(static_cast(module.ptr)); +} + +inline MlirRewritePatternSet wrap(mlir::RewritePatternSet *module) { + return {module}; +} + +inline mlir::FrozenRewritePatternSet * +unwrap(MlirFrozenRewritePatternSet module) { + assert(module.ptr && "unexpected null module"); + return static_cast(module.ptr); +} + +inline MlirFrozenRewritePatternSet wrap(mlir::FrozenRewritePatternSet *module) { + return {module}; +} + +MlirFrozenRewritePatternSet mlirFreezeRewritePattern(MlirRewritePatternSet op) { + auto *m = new mlir::FrozenRewritePatternSet(std::move(unwrap(op))); + op.ptr = nullptr; + return wrap(m); +} + +void mlirFrozenRewritePatternSetDestroy(MlirFrozenRewritePatternSet op) { + delete unwrap(op); + op.ptr = nullptr; +} + +MlirLogicalResult +mlirApplyPatternsAndFoldGreedily(MlirModule op, + MlirFrozenRewritePatternSet patterns, + MlirGreedyRewriteDriverConfig) { + return wrap( + mlir::applyPatternsAndFoldGreedily(unwrap(op), *unwrap(patterns))); +} + +#if MLIR_ENABLE_PDL_IN_PATTERNMATCH +inline mlir::PDLPatternModule *unwrap(MlirPDLPatternModule module) { + assert(module.ptr && "unexpected null module"); + return static_cast(module.ptr); +} + +inline MlirPDLPatternModule wrap(mlir::PDLPatternModule *module) { + return {module}; +} + +MlirPDLPatternModule mlirPDLPatternModuleFromModule(MlirModule op) { + return wrap(new mlir::PDLPatternModule( + mlir::OwningOpRef(unwrap(op)))); +} + +void mlirPDLPatternModuleDestroy(MlirPDLPatternModule op) { + delete unwrap(op); + op.ptr = nullptr; +} + +MlirRewritePatternSet +mlirRewritePatternSetFromPDLPatternModule(MlirPDLPatternModule op) { + auto *m = new mlir::RewritePatternSet(std::move(*unwrap(op))); + op.ptr = nullptr; + return wrap(m); +} +#endif // MLIR_ENABLE_PDL_IN_PATTERNMATCH diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt index af3a1b48f45af9..2a29ee8a7a87cb 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt @@ -6,6 +6,7 @@ add_mlir_dialect_library(MLIRSparseTensorTransforms SparseGPUCodegen.cpp SparseReinterpretMap.cpp SparseStorageSpecifierToLLVM.cpp + SparseSpaceCollapse.cpp SparseTensorCodegen.cpp SparseTensorConversion.cpp SparseTensorPasses.cpp diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp new file mode 100644 index 00000000000000..924046fcd9961f --- /dev/null +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp @@ -0,0 +1,199 @@ +//===--------- SparseSpaceCollapse.cpp - Collapse Sparse Space Pass -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/Transforms/Passes.h" + +#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/Transforms/Passes.h" + +namespace mlir { +#define GEN_PASS_DEF_SPARSESPACECOLLAPSE +#include "mlir/Dialect/SparseTensor/Transforms/Passes.h.inc" +} // namespace mlir + +#define DEBUG_TYPE "sparse-space-collapse" + +using namespace mlir; +using namespace sparse_tensor; + +namespace { + +struct CollapseSpaceInfo { + ExtractIterSpaceOp space; + IterateOp loop; +}; + +bool isCollapsableLoops(LoopLikeOpInterface parent, LoopLikeOpInterface node) { + auto pIterArgs = parent.getRegionIterArgs(); + auto nInitArgs = node.getInits(); + if (pIterArgs.size() != nInitArgs.size()) + return false; + + // Two loops are collapsable if they are perfectly nested. + auto pYields = parent.getYieldedValues(); + auto nResult = node.getLoopResults().value(); + + bool yieldEq = + llvm::all_of(llvm::zip_equal(pYields, nResult), [](auto zipped) { + return std::get<0>(zipped) == std::get<1>(zipped); + }); + + // Parent iter_args should be passed directly to the node's init_args. + bool iterArgEq = + llvm::all_of(llvm::zip_equal(pIterArgs, nInitArgs), [](auto zipped) { + return std::get<0>(zipped) == std::get<1>(zipped); + }); + + return yieldEq && iterArgEq; +} + +bool legalToCollapse(SmallVectorImpl &toCollapse, + ExtractIterSpaceOp curSpace) { + + auto getIterateOpOverSpace = [](ExtractIterSpaceOp space) -> IterateOp { + Value spaceVal = space.getExtractedSpace(); + if (spaceVal.hasOneUse()) + return llvm::dyn_cast(*spaceVal.getUsers().begin()); + return nullptr; + }; + + if (toCollapse.empty()) { + // Collapse root. + if (auto itOp = getIterateOpOverSpace(curSpace)) { + CollapseSpaceInfo &info = toCollapse.emplace_back(); + info.space = curSpace; + info.loop = itOp; + return true; + } + return false; + } + + auto parent = toCollapse.back().space; + auto pItOp = toCollapse.back().loop; + auto nItOp = getIterateOpOverSpace(curSpace); + + // Can only collapse spaces extracted from the same tensor. + if (parent.getTensor() != curSpace.getTensor()) { + LLVM_DEBUG({ + llvm::dbgs() + << "failed to collpase spaces extracted from different tensors."; + }); + return false; + } + + // Can only collapse consecutive simple iteration on one tensor (i.e., no + // coiteration). + if (!nItOp || nItOp->getBlock() != curSpace->getBlock() || + pItOp.getIterator() != curSpace.getParentIter() || + curSpace->getParentOp() != pItOp.getOperation()) { + LLVM_DEBUG( + { llvm::dbgs() << "failed to collapse non-consecutive IterateOps."; }); + return false; + } + + if (pItOp && !isCollapsableLoops(pItOp, nItOp)) { + LLVM_DEBUG({ + llvm::dbgs() + << "failed to collapse IterateOps that are not perfectly nested."; + }); + return false; + } + + CollapseSpaceInfo &info = toCollapse.emplace_back(); + info.space = curSpace; + info.loop = nItOp; + return true; +} + +void collapseSparseSpace(MutableArrayRef toCollapse) { + if (toCollapse.size() < 2) + return; + + ExtractIterSpaceOp root = toCollapse.front().space; + ExtractIterSpaceOp leaf = toCollapse.back().space; + Location loc = root.getLoc(); + + assert(root->hasOneUse() && leaf->hasOneUse()); + + // Insert collapsed operation at the same scope as root operation. + OpBuilder builder(root); + + // Construct the collapsed iteration space. + auto collapsedSpace = builder.create( + loc, root.getTensor(), root.getParentIter(), root.getLoLvl(), + leaf.getHiLvl()); + + auto rItOp = llvm::cast(*root->getUsers().begin()); + auto innermost = toCollapse.back().loop; + + IRMapping mapper; + mapper.map(leaf, collapsedSpace.getExtractedSpace()); + for (auto z : llvm::zip_equal(innermost.getInitArgs(), rItOp.getInitArgs())) + mapper.map(std::get<0>(z), std::get<1>(z)); + + auto cloned = llvm::cast(builder.clone(*innermost, mapper)); + builder.setInsertionPointToStart(cloned.getBody()); + + LevelSet crdUsedLvls; + unsigned shift = 0, argIdx = 1; + for (auto info : toCollapse.drop_back()) { + LevelSet set = info.loop.getCrdUsedLvls(); + crdUsedLvls |= set.lshift(shift); + shift += info.loop.getSpaceDim(); + for (BlockArgument crd : info.loop.getCrds()) { + BlockArgument collapsedCrd = cloned.getBody()->insertArgument( + argIdx++, builder.getIndexType(), crd.getLoc()); + crd.replaceAllUsesWith(collapsedCrd); + } + } + crdUsedLvls |= innermost.getCrdUsedLvls().lshift(shift); + cloned.getIterator().setType(collapsedSpace.getType().getIteratorType()); + cloned.setCrdUsedLvls(crdUsedLvls); + + rItOp.replaceAllUsesWith(cloned.getResults()); + // Erase collapsed loops. + rItOp.erase(); + root.erase(); +} + +struct SparseSpaceCollapsePass + : public impl::SparseSpaceCollapseBase { + SparseSpaceCollapsePass() = default; + + void runOnOperation() override { + func::FuncOp func = getOperation(); + + // A naive (experimental) implementation to collapse consecutive sparse + // spaces. It does NOT handle complex cases where multiple spaces are + // extracted in the same basic block. E.g., + // + // %space1 = extract_space %t1 ... + // %space2 = extract_space %t2 ... + // sparse_tensor.iterate(%sp1) ... + // + SmallVector toCollapse; + func->walk([&](ExtractIterSpaceOp op) { + if (!legalToCollapse(toCollapse, op)) { + // if not legal to collapse one more space, collapse the existing ones + // and clear. + collapseSparseSpace(toCollapse); + toCollapse.clear(); + } + }); + + collapseSparseSpace(toCollapse); + } +}; + +} // namespace + +std::unique_ptr mlir::createSparseSpaceCollapsePass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp index 3a8972072ac3b1..13c750e83d0454 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp @@ -61,20 +61,34 @@ class SparsificationAndBufferizationPass : public impl::SparsificationAndBufferizationBase< SparsificationAndBufferizationPass> { public: + // Private pass options only. SparsificationAndBufferizationPass( const bufferization::OneShotBufferizationOptions &bufferizationOptions, const SparsificationOptions &sparsificationOptions, bool createSparseDeallocs, bool enableRuntimeLibrary, - bool enableBufferInitialization, unsigned vectorLength, - bool enableVLAVectorization, bool enableSIMDIndex32, bool enableGPULibgen) + bool enableBufferInitialization) : bufferizationOptions(bufferizationOptions), sparsificationOptions(sparsificationOptions), createSparseDeallocs(createSparseDeallocs), enableRuntimeLibrary(enableRuntimeLibrary), - enableBufferInitialization(enableBufferInitialization), - vectorLength(vectorLength), - enableVLAVectorization(enableVLAVectorization), - enableSIMDIndex32(enableSIMDIndex32), enableGPULibgen(enableGPULibgen) { + enableBufferInitialization(enableBufferInitialization) {} + // Private pass options and visible pass options. + SparsificationAndBufferizationPass( + const bufferization::OneShotBufferizationOptions &bufferizationOptions, + const SparsificationOptions &sparsificationOptions, + bool createSparseDeallocs, bool enableRuntimeLibrary, + bool enableBufferInitialization, unsigned vl, bool vla, bool index32, + bool gpu) + : bufferizationOptions(bufferizationOptions), + sparsificationOptions(sparsificationOptions), + createSparseDeallocs(createSparseDeallocs), + enableRuntimeLibrary(enableRuntimeLibrary), + enableBufferInitialization(enableBufferInitialization) { + // Set the visible pass options explicitly. + vectorLength = vl; + enableVLAVectorization = vla; + enableSIMDIndex32 = index32; + enableGPULibgen = gpu; } /// Bufferize all dense ops. This assumes that no further analysis is needed @@ -178,10 +192,6 @@ class SparsificationAndBufferizationPass bool createSparseDeallocs; bool enableRuntimeLibrary; bool enableBufferInitialization; - unsigned vectorLength; - bool enableVLAVectorization; - bool enableSIMDIndex32; - bool enableGPULibgen; }; } // namespace sparse_tensor @@ -213,16 +223,13 @@ mlir::getBufferizationOptionsForSparsification(bool analysisOnly) { std::unique_ptr mlir::createSparsificationAndBufferizationPass() { SparsificationOptions sparseOptions; - return createSparsificationAndBufferizationPass( + return std::make_unique< + mlir::sparse_tensor::SparsificationAndBufferizationPass>( getBufferizationOptionsForSparsification(/*analysisOnly=*/false), sparseOptions, /*createSparseDeallocs=*/false, /*enableRuntimeLibrary=*/false, - /*enableBufferInitialization=*/false, - /*vectorLength=*/0, - /*enableVLAVectorization=*/false, - /*enableSIMDIndex32=*/false, - /*enableGPULibgen=*/false); + /*enableBufferInitialization=*/false); } std::unique_ptr mlir::createSparsificationAndBufferizationPass( diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index d8f2d1989fdea7..d03036e17749d4 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -21,6 +21,7 @@ declare_mlir_python_sources(MLIRPythonSources.Core.Python _mlir_libs/__init__.py ir.py passmanager.py + rewrite.py dialects/_ods_common.py # The main _mlir module has submodules: include stubs from each. @@ -448,6 +449,7 @@ declare_mlir_python_extension(MLIRPythonExtension.Core IRModule.cpp IRTypes.cpp Pass.cpp + Rewrite.cpp # Headers must be included explicitly so they are installed. Globals.h diff --git a/mlir/python/mlir/dialects/pdl.py b/mlir/python/mlir/dialects/pdl.py index db07dc50aabd79..b7b8430cebd07a 100644 --- a/mlir/python/mlir/dialects/pdl.py +++ b/mlir/python/mlir/dialects/pdl.py @@ -6,7 +6,7 @@ from ._pdl_ops_gen import _Dialect from .._mlir_libs._mlirDialectsPDL import * from .._mlir_libs._mlirDialectsPDL import OperationType - +from ..extras.meta import region_op try: from ..ir import * @@ -127,6 +127,9 @@ def body(self): return self.regions[0].blocks[0] +pattern = region_op(PatternOp.__base__) + + @_ods_cext.register_operation(_Dialect, replace=True) class ReplaceOp(ReplaceOp): """Specialization for PDL replace op class.""" @@ -195,6 +198,9 @@ def body(self): return self.regions[0].blocks[0] +rewrite = region_op(RewriteOp) + + @_ods_cext.register_operation(_Dialect, replace=True) class TypeOp(TypeOp): """Specialization for PDL type op class.""" diff --git a/mlir/python/mlir/rewrite.py b/mlir/python/mlir/rewrite.py new file mode 100644 index 00000000000000..5bc1bba7ae9a72 --- /dev/null +++ b/mlir/python/mlir/rewrite.py @@ -0,0 +1,5 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from ._mlir_libs._mlir.rewrite import * diff --git a/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir b/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir new file mode 100755 index 00000000000000..2475aa5139da48 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir @@ -0,0 +1,43 @@ +// RUN: mlir-opt %s --sparsification-and-bufferization | FileCheck %s --check-prefix=CHECK-NOVEC +// RUN: mlir-opt %s --sparsification-and-bufferization="vl=8" | FileCheck %s --check-prefix=CHECK-VEC + +// Test to ensure we can pass optimization flags into +// the mini sparsification and bufferization pipeline. + +#SV = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + +#trait_sum_reduction = { + indexing_maps = [ + affine_map<(i) -> (i)>, // a + affine_map<(i) -> ()> // x (scalar out) + ], + iterator_types = ["reduction"], + doc = "x += SUM_i a(i)" +} + +// +// CHECK-NOVEC-LABEL: func.func @sum_reduction +// CHECK-NOVEC: scf.for +// CHECK-NOVEC: arith.addf %{{.*}} %{{.*}} : f32 +// CHECK-NOVEC: } +// +// CHECK-VEC-LABEL: func.func @sum_reduction +// CHECK-VEC: vector.insertelement +// CHECK-VEC: scf.for +// CHECK-VEC: vector.create_mask +// CHECK-VEC: vector.maskedload +// CHECK-VEC: arith.addf %{{.*}} %{{.*}} : vector<8xf32> +// CHECK-VEC: } +// CHECK-VEC: vector.reduction +// +func.func @sum_reduction(%arga: tensor, + %argx: tensor) -> tensor { + %0 = linalg.generic #trait_sum_reduction + ins(%arga: tensor) + outs(%argx: tensor) { + ^bb(%a: f32, %x: f32): + %0 = arith.addf %x, %a : f32 + linalg.yield %0 : f32 + } -> tensor + return %0 : tensor +} diff --git a/mlir/test/Dialect/SparseTensor/sparse_space_collapse.mlir b/mlir/test/Dialect/SparseTensor/sparse_space_collapse.mlir new file mode 100644 index 00000000000000..baa6199f12bc38 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_space_collapse.mlir @@ -0,0 +1,37 @@ +// RUN: mlir-opt %s --sparse-space-collapse | FileCheck %s + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +// CHECK-LABEL: func.func @sparse_sparse_collapse( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<4x8xf32, #sparse>, +// CHECK-SAME: %[[VAL_1:.*]]: index) { +// CHECK: %[[VAL_3:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] lvls = 0 to 2 : tensor<4x8xf32, #sparse> +// CHECK: %[[VAL_4:.*]] = sparse_tensor.iterate %[[VAL_5:.*]] in %[[VAL_3]] at(%[[VAL_6:.*]], _) iter_args(%[[VAL_7:.*]] = %[[VAL_1]]) +// CHECK: %[[VAL_8:.*]] = "test.op"(%[[VAL_7]]) : (index) -> index +// CHECK: sparse_tensor.yield %[[VAL_8]] : index +// CHECK: } +// CHECK: "test.sink"(%[[VAL_4]]) : (index) -> () +// CHECK: return +// CHECK: } +func.func @sparse_sparse_collapse(%sp : tensor<4x8xf32, #COO>, %i : index) { + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 + : tensor<4x8xf32, #COO> + -> !sparse_tensor.iter_space<#COO, lvls = 0> + %r1 = sparse_tensor.iterate %it1 in %l1 at(%crd0) iter_args(%outer = %i): !sparse_tensor.iter_space<#COO, lvls = 0 to 1> -> index { + %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 + : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0 to 1> + -> !sparse_tensor.iter_space<#COO, lvls = 1> + %r2 = sparse_tensor.iterate %it2 in %l2 iter_args(%inner = %outer): !sparse_tensor.iter_space<#COO, lvls = 1 to 2> -> index { + %k ="test.op"(%inner) : (index) -> index + sparse_tensor.yield %k : index + } + sparse_tensor.yield %r2 : index + } + "test.sink"(%r1) : (index) -> () + return +} diff --git a/mlir/test/python/integration/dialects/pdl.py b/mlir/test/python/integration/dialects/pdl.py new file mode 100644 index 00000000000000..923af29a71ad75 --- /dev/null +++ b/mlir/test/python/integration/dialects/pdl.py @@ -0,0 +1,67 @@ +# RUN: %PYTHON %s 2>&1 | FileCheck %s + +from mlir.dialects import arith, func, pdl +from mlir.dialects.builtin import module +from mlir.ir import * +from mlir.rewrite import * + + +def construct_and_print_in_module(f): + print("\nTEST:", f.__name__) + with Context(), Location.unknown(): + module = Module.create() + with InsertionPoint(module.body): + module = f(module) + if module is not None: + print(module) + return f + + +# CHECK-LABEL: TEST: test_add_to_mul +# CHECK: arith.muli +@construct_and_print_in_module +def test_add_to_mul(module_): + index_type = IndexType.get() + + # Create a test case. + @module(sym_name="ir") + def ir(): + @func.func(index_type, index_type) + def add_func(a, b): + return arith.addi(a, b) + + # Create a rewrite from add to mul. This will match + # - operation name is arith.addi + # - operands are index types. + # - there are two operands. + with Location.unknown(): + m = Module.create() + with InsertionPoint(m.body): + # Change all arith.addi with index types to arith.muli. + @pdl.pattern(benefit=1, sym_name="addi_to_mul") + def pat(): + # Match arith.addi with index types. + index_type = pdl.TypeOp(IndexType.get()) + operand0 = pdl.OperandOp(index_type) + operand1 = pdl.OperandOp(index_type) + op0 = pdl.OperationOp( + name="arith.addi", args=[operand0, operand1], types=[index_type] + ) + + # Replace the matched op with arith.muli. + @pdl.rewrite() + def rew(): + newOp = pdl.OperationOp( + name="arith.muli", args=[operand0, operand1], types=[index_type] + ) + pdl.ReplaceOp(op0, with_op=newOp) + + # Create a PDL module from module and freeze it. At this point the ownership + # of the module is transferred to the PDL module. This ownership transfer is + # not yet captured Python side/has sharp edges. So best to construct the + # module and PDL module in same scope. + # FIXME: This should be made more robust. + frozen = PDLModule(m).freeze() + # Could apply frozen pattern set multiple times. + apply_patterns_and_fold_greedily(module_, frozen) + return module_ diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 3dfd3fd91add77..a2d3f400600d0d 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -420,6 +420,7 @@ mlir_c_api_cc_library( "include/mlir-c/Interfaces.h", "include/mlir-c/Pass.h", "include/mlir-c/RegisterEverything.h", + "include/mlir-c/Rewrite.h", "include/mlir-c/Support.h", "include/mlir/CAPI/AffineExpr.h", "include/mlir/CAPI/AffineMap.h", @@ -866,7 +867,10 @@ mlir_c_api_cc_library( mlir_c_api_cc_library( name = "CAPITransforms", - srcs = ["lib/CAPI/Transforms/Passes.cpp"], + srcs = [ + "lib/CAPI/Transforms/Passes.cpp", + "lib/CAPI/Transforms/Rewrite.cpp", + ], hdrs = ["include/mlir-c/Transforms.h"], capi_deps = [ ":CAPIIR", @@ -876,7 +880,10 @@ mlir_c_api_cc_library( ], includes = ["include"], deps = [ + ":IR", ":Pass", + ":Rewrite", + ":TransformUtils", ":Transforms", ], ) @@ -939,6 +946,7 @@ cc_library( textual_hdrs = glob(MLIR_BINDINGS_PYTHON_HEADERS), deps = [ ":CAPIIRHeaders", + ":CAPITransformsHeaders", "@local_config_python//:python_headers", "@pybind11", ], @@ -957,6 +965,7 @@ cc_library( textual_hdrs = glob(MLIR_BINDINGS_PYTHON_HEADERS), deps = [ ":CAPIIR", + ":CAPITransforms", "@local_config_python//:python_headers", "@pybind11", ], @@ -981,6 +990,7 @@ MLIR_PYTHON_BINDINGS_SOURCES = [ "lib/Bindings/Python/IRModule.cpp", "lib/Bindings/Python/IRTypes.cpp", "lib/Bindings/Python/Pass.cpp", + "lib/Bindings/Python/Rewrite.cpp", ] cc_library( diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel index add150de69faf4..254cab0db4a5d6 100644 --- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel @@ -82,6 +82,13 @@ filegroup( ], ) +filegroup( + name = "RewritePyFiles", + srcs = [ + "mlir/rewrite.py", + ], +) + filegroup( name = "RuntimePyFiles", srcs = glob([